/root/doris/be/src/util/slice.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <assert.h> |
21 | | #include <stddef.h> |
22 | | #include <stdint.h> |
23 | | #include <string.h> |
24 | | |
25 | | #include <iostream> |
26 | | #include <map> |
27 | | #include <string> |
28 | | #include <utility> |
29 | | #include <vector> |
30 | | |
31 | | #include "vec/common/allocator.h" |
32 | | |
33 | | namespace doris { |
34 | | |
35 | | class faststring; |
36 | | |
37 | | /// @brief A wrapper around externally allocated data. |
38 | | /// |
39 | | /// Slice is a simple structure containing a pointer into some external |
40 | | /// storage and a size. The user of a Slice must ensure that the slice |
41 | | /// is not used after the corresponding external storage has been |
42 | | /// deallocated. |
43 | | /// |
44 | | /// Multiple threads can invoke const methods on a Slice without |
45 | | /// external synchronization, but if any of the threads may call a |
46 | | /// non-const method, all threads accessing the same Slice must use |
47 | | /// external synchronization. |
48 | | struct Slice { |
49 | | public: |
50 | | char* data = nullptr; |
51 | | size_t size; |
52 | | // Intentionally copyable |
53 | | |
54 | | /// Create an empty slice. |
55 | 1.59M | Slice() : data(const_cast<char*>("")), size(0) {} |
56 | | |
57 | | /// Create a slice that refers to a @c char byte array. |
58 | 2.90M | Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {} |
59 | | |
60 | | // Create a slice that refers to a @c uint8_t byte array. |
61 | | // |
62 | | // @param [in] d |
63 | | // The input array. |
64 | | // @param [in] n |
65 | | // Number of bytes in the array. |
66 | | Slice(const uint8_t* s, size_t n) |
67 | 222k | : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {} |
68 | | |
69 | | /// Create a slice that refers to the contents of the given string. |
70 | | Slice(const std::string& s) |
71 | | : // NOLINT(runtime/explicit) |
72 | | data(const_cast<char*>(s.data())), |
73 | 387k | size(s.size()) {} |
74 | | |
75 | | Slice(const faststring& s); |
76 | | |
77 | | /// Create a slice that refers to a C-string s[0,strlen(s)-1]. |
78 | | Slice(const char* s) |
79 | | : // NOLINT(runtime/explicit) |
80 | | data(const_cast<char*>(s)), |
81 | 1.16k | size(strlen(s)) {} |
82 | | |
83 | | /// default copy/move constructor and assignment |
84 | | Slice(const Slice&) = default; |
85 | | Slice& operator=(const Slice&) = default; |
86 | | Slice(Slice&&) noexcept = default; |
87 | | Slice& operator=(Slice&&) noexcept = default; |
88 | | |
89 | | /// @return A pointer to the beginning of the referenced data. |
90 | 1.65M | const char* get_data() const { return data; } |
91 | | |
92 | | /// @return A mutable pointer to the beginning of the referenced data. |
93 | 4 | char* mutable_data() { return const_cast<char*>(data); } |
94 | | |
95 | | /// @return The length (in bytes) of the referenced data. |
96 | 2.10M | size_t get_size() const { return size; } |
97 | | |
98 | | /// @return @c true iff the length of the referenced data is zero. |
99 | 47.4k | bool empty() const { return size == 0; } |
100 | | |
101 | | /// @return the n-th byte in the referenced data. |
102 | 110k | const char& operator[](size_t n) const { |
103 | 110k | assert(n < size); |
104 | 0 | return data[n]; |
105 | 110k | } |
106 | | |
107 | | /// Change this slice to refer to an empty array. |
108 | 0 | void clear() { |
109 | 0 | data = const_cast<char*>(""); |
110 | 0 | size = 0; |
111 | 0 | } |
112 | | |
113 | | /// Drop the first "n" bytes from this slice. |
114 | | /// |
115 | | /// @pre n <= size |
116 | | /// |
117 | | /// @note Only the base and bounds of the slice are changed; |
118 | | /// the data is not modified. |
119 | | /// |
120 | | /// @param [in] n |
121 | | /// Number of bytes that should be dropped from the beginning. |
122 | 34.5k | void remove_prefix(size_t n) { |
123 | 34.5k | assert(n <= size); |
124 | 0 | data += n; |
125 | 34.5k | size -= n; |
126 | 34.5k | } |
127 | | |
128 | | /// Drop the last "n" bytes from this slice. |
129 | | /// |
130 | | /// @pre n <= size |
131 | | /// |
132 | | /// @note Only the base and bounds of the slice are changed; |
133 | | /// the data is not modified. |
134 | | /// |
135 | | /// @param [in] n |
136 | | /// Number of bytes that should be dropped from the last. |
137 | 544 | void remove_suffix(size_t n) { |
138 | 544 | assert(n <= size); |
139 | 0 | size -= n; |
140 | 544 | } |
141 | | |
142 | | /// Remove leading spaces. |
143 | | /// |
144 | | /// @pre n <= size |
145 | | /// |
146 | | /// @note Only the base and bounds of the slice are changed; |
147 | | /// the data is not modified. |
148 | | /// |
149 | | /// @param [in] n |
150 | | /// Number of bytes of space that should be dropped from the beginning. |
151 | 839 | void trim_prefix() { |
152 | 839 | int32_t begin = 0; |
153 | 1.01k | while (begin < size && data[begin] == ' ') { |
154 | 174 | data += 1; |
155 | 174 | size -= 1; |
156 | 174 | } |
157 | 839 | } |
158 | | |
159 | | /// Remove quote char '"' or ''' which should exist as first and last char. |
160 | | /// |
161 | | /// @pre n <= size |
162 | | /// |
163 | | /// @note Only the base and bounds of the slice are changed; |
164 | | /// the data is not modified. |
165 | | /// |
166 | | /// @param [in] n |
167 | | /// Number of bytes of space that should be dropped from the beginning. |
168 | 598 | bool trim_quote() { |
169 | 598 | int32_t begin = 0; |
170 | 598 | bool change = false; |
171 | 598 | if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') || |
172 | 532 | (data[begin] == '\'' && data[size - 1] == '\''))) { |
173 | 141 | data += 1; |
174 | 141 | size -= 2; |
175 | 141 | change = true; |
176 | 141 | } |
177 | 598 | return change; |
178 | 598 | } |
179 | | |
180 | | /// Remove quote char '"' which should exist as first and last char. |
181 | | /// |
182 | | /// @pre n <= size |
183 | | /// |
184 | | /// @note Only the base and bounds of the slice are changed; |
185 | | /// the data is not modified. |
186 | | /// |
187 | | /// @param [in] n |
188 | | /// Number of bytes of space that should be dropped from the beginning. |
189 | 0 | bool trim_double_quotes() { |
190 | 0 | int32_t begin = 0; |
191 | 0 | if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) { |
192 | 0 | data += 1; |
193 | 0 | size -= 2; |
194 | 0 | return true; |
195 | 0 | } |
196 | 0 | return false; |
197 | 0 | } |
198 | | |
199 | | /// Truncate the slice to the given number of bytes. |
200 | | /// |
201 | | /// @pre n <= size |
202 | | /// |
203 | | /// @note Only the base and bounds of the slice are changed; |
204 | | /// the data is not modified. |
205 | | /// |
206 | | /// @param [in] n |
207 | | /// The new size of the slice. |
208 | 0 | void truncate(size_t n) { |
209 | 0 | assert(n <= size); |
210 | 0 | size = n; |
211 | 0 | } |
212 | | |
213 | | /// @return A string that contains a copy of the referenced data. |
214 | 39.6k | std::string to_string() const { return std::string(data, size); } |
215 | | |
216 | | /// Do a three-way comparison of the slice's data. |
217 | | int compare(const Slice& b) const; |
218 | | |
219 | | /// Check whether the slice starts with the given prefix. |
220 | 0 | bool starts_with(const Slice& x) const { |
221 | 0 | return ((size >= x.size) && (mem_equal(data, x.data, x.size))); |
222 | 0 | } |
223 | | |
224 | 11 | bool ends_with(const Slice& x) const { |
225 | 11 | return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size)); |
226 | 11 | } |
227 | | |
228 | | /// @brief Comparator struct, useful for ordered collections (like STL maps). |
229 | | struct Comparator { |
230 | | /// Compare two slices using Slice::compare() |
231 | | /// |
232 | | /// @param [in] a |
233 | | /// The slice to call Slice::compare() at. |
234 | | /// @param [in] b |
235 | | /// The slice to use as a parameter for Slice::compare(). |
236 | | /// @return @c true iff @c a is less than @c b by Slice::compare(). |
237 | 188k | bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; } |
238 | | }; |
239 | | |
240 | | /// Relocate/copy the slice's data into a new location. |
241 | | /// |
242 | | /// @param [in] d |
243 | | /// The new location for the data. If it's the same location, then no |
244 | | /// relocation is done. It is assumed that the new location is |
245 | | /// large enough to fit the data. |
246 | 462 | void relocate(char* d) { |
247 | 462 | if (data != d) { |
248 | 462 | memcpy(d, data, size); |
249 | 462 | data = d; |
250 | 462 | } |
251 | 462 | } |
252 | | |
253 | | friend bool operator==(const Slice& x, const Slice& y); |
254 | | |
255 | | friend std::ostream& operator<<(std::ostream& os, const Slice& slice); |
256 | | |
257 | 19.2k | static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; } |
258 | | |
259 | 1.30M | static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); } |
260 | | |
261 | 42.6k | static size_t compute_total_size(const std::vector<Slice>& slices) { |
262 | 42.6k | size_t total_size = 0; |
263 | 42.6k | for (auto& slice : slices) { |
264 | 42.6k | total_size += slice.size; |
265 | 42.6k | } |
266 | 42.6k | return total_size; |
267 | 42.6k | } |
268 | | |
269 | 0 | static std::string to_string(const std::vector<Slice>& slices) { |
270 | 0 | std::string buf; |
271 | 0 | for (auto& slice : slices) { |
272 | 0 | buf.append(slice.data, slice.size); |
273 | 0 | } |
274 | 0 | return buf; |
275 | 0 | } |
276 | | }; |
277 | | |
278 | 0 | inline std::ostream& operator<<(std::ostream& os, const Slice& slice) { |
279 | 0 | os << slice.to_string(); |
280 | 0 | return os; |
281 | 0 | } |
282 | | |
283 | | /// Check whether two slices are identical. |
284 | 19.2k | inline bool operator==(const Slice& x, const Slice& y) { |
285 | 19.2k | return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size))); |
286 | 19.2k | } |
287 | | |
288 | | /// Check whether two slices are not identical. |
289 | 0 | inline bool operator!=(const Slice& x, const Slice& y) { |
290 | 0 | return !(x == y); |
291 | 0 | } |
292 | | |
293 | 1.30M | inline int Slice::compare(const Slice& b) const { |
294 | 1.30M | const int min_len = (size < b.size) ? size : b.size; |
295 | 1.30M | int r = mem_compare(data, b.data, min_len); |
296 | 1.30M | if (r == 0) { |
297 | 394k | if (size < b.size) |
298 | 798 | r = -1; |
299 | 393k | else if (size > b.size) |
300 | 967 | r = +1; |
301 | 394k | } |
302 | 1.30M | return r; |
303 | 1.30M | } |
304 | | |
305 | | /// @brief STL map whose keys are Slices. |
306 | | /// |
307 | | /// An example of usage: |
308 | | /// @code |
309 | | /// typedef SliceMap<int>::type MySliceMap; |
310 | | /// |
311 | | /// MySliceMap my_map; |
312 | | /// my_map.insert(MySliceMap::value_type(a, 1)); |
313 | | /// my_map.insert(MySliceMap::value_type(b, 2)); |
314 | | /// my_map.insert(MySliceMap::value_type(c, 3)); |
315 | | /// |
316 | | /// for (const MySliceMap::value_type& pair : my_map) { |
317 | | /// ... |
318 | | /// } |
319 | | /// @endcode |
320 | | template <typename T> |
321 | | struct SliceMap { |
322 | | /// A handy typedef for the slice map with appropriate comparison operator. |
323 | | typedef std::map<Slice, T, Slice::Comparator> type; |
324 | | }; |
325 | | |
326 | | // A move-only type which manage the lifecycle of externally allocated data. |
327 | | // Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access |
328 | | // the underlying buffer as a Slice. |
329 | | // |
330 | | // Usage example: |
331 | | // OwnedSlice read_page(PagePointer pp); |
332 | | // { |
333 | | // OwnedSlice page_data(new uint8_t[pp.size], pp.size); |
334 | | // Status s = _file.read_at(pp.offset, owned.slice()); |
335 | | // if (!s.ok()) { |
336 | | // return s; // `page_data` destructs, deallocate underlying buffer |
337 | | // } |
338 | | // return page_data; // transfer ownership of buffer into the caller |
339 | | // } |
340 | | // |
341 | | // only receive the memory allocated by Allocator and disables mmap, |
342 | | // otherwise the memory may not be freed correctly, currently only be constructed by faststring. |
343 | | class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> { |
344 | | public: |
345 | 70.2k | OwnedSlice() : _slice((uint8_t*)nullptr, 0) {} |
346 | | |
347 | | OwnedSlice(size_t length) |
348 | | : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length), |
349 | 98 | _capacity(length) {} |
350 | | |
351 | 23.5k | OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) { |
352 | 23.5k | src._slice.data = nullptr; |
353 | 23.5k | src._slice.size = 0; |
354 | 23.5k | src._capacity = 0; |
355 | 23.5k | } |
356 | | |
357 | 42.8k | OwnedSlice& operator=(OwnedSlice&& src) { |
358 | 42.8k | if (this != &src) { |
359 | 42.8k | std::swap(_slice, src._slice); |
360 | 42.8k | std::swap(_capacity, src._capacity); |
361 | 42.8k | } |
362 | 42.8k | return *this; |
363 | 42.8k | } |
364 | | |
365 | | // disable copy constructor and copy assignment |
366 | | OwnedSlice(const OwnedSlice&) = delete; |
367 | | void operator=(const OwnedSlice&) = delete; |
368 | | |
369 | 120k | ~OwnedSlice() { |
370 | 120k | if (_slice.data != nullptr) { |
371 | 26.5k | DCHECK(_capacity != 0); |
372 | 26.5k | Allocator::free(_slice.data, _capacity); |
373 | 26.5k | } |
374 | 120k | } |
375 | | |
376 | 412 | char* data() const { return _slice.data; } |
377 | | |
378 | 98.8k | const Slice& slice() const { return _slice; } |
379 | | |
380 | | private: |
381 | | // faststring also inherits Allocator and disables mmap. |
382 | | friend class faststring; |
383 | | |
384 | | OwnedSlice(uint8_t* _data, size_t size, size_t capacity) |
385 | 26.4k | : _slice(_data, size), _capacity(capacity) {} |
386 | | |
387 | | Slice _slice; |
388 | | size_t _capacity = 0; |
389 | | }; |
390 | | |
391 | | } // namespace doris |