/root/doris/be/src/util/slice.h
| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  |  | 
| 18 |  | #pragma once | 
| 19 |  |  | 
| 20 |  | #include <assert.h> | 
| 21 |  | #include <stddef.h> | 
| 22 |  | #include <stdint.h> | 
| 23 |  | #include <string.h> | 
| 24 |  |  | 
| 25 |  | #include <iostream> | 
| 26 |  | #include <map> | 
| 27 |  | #include <string> | 
| 28 |  | #include <utility> | 
| 29 |  | #include <vector> | 
| 30 |  |  | 
| 31 |  | #include "vec/common/allocator.h" | 
| 32 |  |  | 
| 33 |  | namespace doris { | 
| 34 |  |  | 
| 35 |  | class faststring; | 
| 36 |  |  | 
| 37 |  | /// @brief A wrapper around externally allocated data. | 
| 38 |  | /// | 
| 39 |  | /// Slice is a simple structure containing a pointer into some external | 
| 40 |  | /// storage and a size. The user of a Slice must ensure that the slice | 
| 41 |  | /// is not used after the corresponding external storage has been | 
| 42 |  | /// deallocated. | 
| 43 |  | /// | 
| 44 |  | /// Multiple threads can invoke const methods on a Slice without | 
| 45 |  | /// external synchronization, but if any of the threads may call a | 
| 46 |  | /// non-const method, all threads accessing the same Slice must use | 
| 47 |  | /// external synchronization. | 
| 48 |  | struct Slice { | 
| 49 |  | public: | 
| 50 |  |     char* data = nullptr; | 
| 51 |  |     size_t size; | 
| 52 |  |     // Intentionally copyable | 
| 53 |  |  | 
| 54 |  |     /// Create an empty slice. | 
| 55 | 36.5M |     Slice() : data(const_cast<char*>("")), size(0) {} | 
| 56 |  |  | 
| 57 |  |     /// Create a slice that refers to a @c char byte array. | 
| 58 | 909M |     Slice(const char* d, size_t n) : data(const_cast<char*>(d)), size(n) {} | 
| 59 |  |  | 
| 60 |  |     // Create a slice that refers to a @c uint8_t byte array. | 
| 61 |  |     // | 
| 62 |  |     // @param [in] d | 
| 63 |  |     //   The input array. | 
| 64 |  |     // @param [in] n | 
| 65 |  |     //   Number of bytes in the array. | 
| 66 |  |     Slice(const uint8_t* s, size_t n) | 
| 67 | 82.0M |             : data(const_cast<char*>(reinterpret_cast<const char*>(s))), size(n) {} | 
| 68 |  |  | 
| 69 |  |     /// Create a slice that refers to the contents of the given string. | 
| 70 |  |     Slice(const std::string& s) | 
| 71 |  |             : // NOLINT(runtime/explicit) | 
| 72 |  |               data(const_cast<char*>(s.data())), | 
| 73 | 22.1M |               size(s.size()) {} | 
| 74 |  |  | 
| 75 |  |     Slice(const faststring& s); | 
| 76 |  |  | 
| 77 |  |     /// Create a slice that refers to a C-string s[0,strlen(s)-1]. | 
| 78 |  |     Slice(const char* s) | 
| 79 |  |             : // NOLINT(runtime/explicit) | 
| 80 |  |               data(const_cast<char*>(s)), | 
| 81 | 2.32k |               size(strlen(s)) {} | 
| 82 |  |  | 
| 83 |  |     /// default copy/move constructor and assignment | 
| 84 |  |     Slice(const Slice&) = default; | 
| 85 |  |     Slice& operator=(const Slice&) = default; | 
| 86 |  |     Slice(Slice&&) noexcept = default; | 
| 87 |  |     Slice& operator=(Slice&&) noexcept = default; | 
| 88 |  |  | 
| 89 |  |     /// @return A pointer to the beginning of the referenced data. | 
| 90 | 96.4M |     const char* get_data() const { return data; } | 
| 91 |  |  | 
| 92 |  |     /// @return A mutable pointer to the beginning of the referenced data. | 
| 93 | 60.8k |     char* mutable_data() { return const_cast<char*>(data); } | 
| 94 |  |  | 
| 95 |  |     /// @return The length (in bytes) of the referenced data. | 
| 96 | 156M |     size_t get_size() const { return size; } | 
| 97 |  |  | 
| 98 |  |     /// @return @c true iff the length of the referenced data is zero. | 
| 99 | 196M |     bool empty() const { return size == 0; } | 
| 100 |  |  | 
| 101 |  |     /// @return the n-th byte in the referenced data. | 
| 102 | 298M |     const char& operator[](size_t n) const { | 
| 103 | 298M |         assert(n < size); | 
| 104 | 0 |         return data[n]; | 
| 105 | 298M |     } | 
| 106 |  |  | 
| 107 |  |     /// Change this slice to refer to an empty array. | 
| 108 | 0 |     void clear() { | 
| 109 | 0 |         data = const_cast<char*>(""); | 
| 110 | 0 |         size = 0; | 
| 111 | 0 |     } | 
| 112 |  |  | 
| 113 |  |     /// Drop the first "n" bytes from this slice. | 
| 114 |  |     /// | 
| 115 |  |     /// @pre n <= size | 
| 116 |  |     /// | 
| 117 |  |     /// @note Only the base and bounds of the slice are changed; | 
| 118 |  |     ///   the data is not modified. | 
| 119 |  |     /// | 
| 120 |  |     /// @param [in] n | 
| 121 |  |     ///   Number of bytes that should be dropped from the beginning. | 
| 122 | 2.02M |     void remove_prefix(size_t n) { | 
| 123 | 2.02M |         assert(n <= size); | 
| 124 | 0 |         data += n; | 
| 125 | 2.02M |         size -= n; | 
| 126 | 2.02M |     } | 
| 127 |  |  | 
| 128 |  |     /// Drop the last "n" bytes from this slice. | 
| 129 |  |     /// | 
| 130 |  |     /// @pre n <= size | 
| 131 |  |     /// | 
| 132 |  |     /// @note Only the base and bounds of the slice are changed; | 
| 133 |  |     ///   the data is not modified. | 
| 134 |  |     /// | 
| 135 |  |     /// @param [in] n | 
| 136 |  |     ///   Number of bytes that should be dropped from the last. | 
| 137 | 2.65M |     void remove_suffix(size_t n) { | 
| 138 | 2.65M |         assert(n <= size); | 
| 139 | 0 |         size -= n; | 
| 140 | 2.65M |     } | 
| 141 |  |  | 
| 142 |  |     /// Remove leading spaces. | 
| 143 |  |     /// | 
| 144 |  |     /// @pre n <= size | 
| 145 |  |     /// | 
| 146 |  |     /// @note Only the base and bounds of the slice are changed; | 
| 147 |  |     ///   the data is not modified. | 
| 148 |  |     /// | 
| 149 |  |     /// @param [in] n | 
| 150 |  |     ///   Number of bytes of space that should be dropped from the beginning. | 
| 151 | 6.99M |     void trim_prefix() { | 
| 152 | 6.99M |         int32_t begin = 0; | 
| 153 | 7.29M |         while (begin < size && data[begin] == ' ') { | 
| 154 | 295k |             data += 1; | 
| 155 | 295k |             size -= 1; | 
| 156 | 295k |         } | 
| 157 | 6.99M |     } | 
| 158 |  |  | 
| 159 |  |     /// Remove quote char '"' or ''' which should exist as first and last char. | 
| 160 |  |     /// | 
| 161 |  |     /// @pre n <= size | 
| 162 |  |     /// | 
| 163 |  |     /// @note Only the base and bounds of the slice are changed; | 
| 164 |  |     ///   the data is not modified. | 
| 165 |  |     /// | 
| 166 |  |     /// @param [in] n | 
| 167 |  |     ///   Number of bytes of space that should be dropped from the beginning. | 
| 168 | 10.3M |     bool trim_quote() { | 
| 169 | 10.3M |         int32_t begin = 0; | 
| 170 | 10.3M |         bool change = false; | 
| 171 | 10.3M |         if (size >= 2 && ((data[begin] == '"' && data[size - 1] == '"') || | 
| 172 | 9.58M |                           (data[begin] == '\'' && data[size - 1] == '\''))) { | 
| 173 | 3.94M |             data += 1; | 
| 174 | 3.94M |             size -= 2; | 
| 175 | 3.94M |             change = true; | 
| 176 | 3.94M |         } | 
| 177 | 10.3M |         return change; | 
| 178 | 10.3M |     } | 
| 179 |  |  | 
| 180 |  |     /// Remove quote char '"' which should exist as first and last char. | 
| 181 |  |     /// | 
| 182 |  |     /// @pre n <= size | 
| 183 |  |     /// | 
| 184 |  |     /// @note Only the base and bounds of the slice are changed; | 
| 185 |  |     ///   the data is not modified. | 
| 186 |  |     /// | 
| 187 |  |     /// @param [in] n | 
| 188 |  |     ///   Number of bytes of space that should be dropped from the beginning. | 
| 189 | 3.38k |     bool trim_double_quotes() { | 
| 190 | 3.38k |         int32_t begin = 0; | 
| 191 | 3.38k |         if (size >= 2 && (data[begin] == '"' && data[size - 1] == '"')) { | 
| 192 | 41 |             data += 1; | 
| 193 | 41 |             size -= 2; | 
| 194 | 41 |             return true; | 
| 195 | 41 |         } | 
| 196 | 3.34k |         return false; | 
| 197 | 3.38k |     } | 
| 198 |  |  | 
| 199 |  |     /// Truncate the slice to the given number of bytes. | 
| 200 |  |     /// | 
| 201 |  |     /// @pre n <= size | 
| 202 |  |     /// | 
| 203 |  |     /// @note Only the base and bounds of the slice are changed; | 
| 204 |  |     ///   the data is not modified. | 
| 205 |  |     /// | 
| 206 |  |     /// @param [in] n | 
| 207 |  |     ///   The new size of the slice. | 
| 208 |  |     void truncate(size_t n) { | 
| 209 |  |         assert(n <= size); | 
| 210 |  |         size = n; | 
| 211 |  |     } | 
| 212 |  |  | 
| 213 |  |     /// @return A string that contains a copy of the referenced data. | 
| 214 | 818k |     std::string to_string() const { return std::string(data, size); } | 
| 215 |  |  | 
| 216 |  |     /// Do a three-way comparison of the slice's data. | 
| 217 |  |     int compare(const Slice& b) const; | 
| 218 |  |  | 
| 219 |  |     /// Check whether the slice starts with the given prefix. | 
| 220 | 0 |     bool starts_with(const Slice& x) const { | 
| 221 | 0 |         return ((size >= x.size) && (mem_equal(data, x.data, x.size))); | 
| 222 | 0 |     } | 
| 223 |  |  | 
| 224 | 364 |     bool ends_with(const Slice& x) const { | 
| 225 | 364 |         return ((size >= x.size) && mem_equal(data + (size - x.size), x.data, x.size)); | 
| 226 | 364 |     } | 
| 227 |  |  | 
| 228 |  |     /// @brief Comparator struct, useful for ordered collections (like STL maps). | 
| 229 |  |     struct Comparator { | 
| 230 |  |         /// Compare two slices using Slice::compare() | 
| 231 |  |         /// | 
| 232 |  |         /// @param [in] a | 
| 233 |  |         ///   The slice to call Slice::compare() at. | 
| 234 |  |         /// @param [in] b | 
| 235 |  |         ///   The slice to use as a parameter for Slice::compare(). | 
| 236 |  |         /// @return @c true iff @c a is less than @c b by Slice::compare(). | 
| 237 | 0 |         bool operator()(const Slice& a, const Slice& b) const { return a.compare(b) < 0; } | 
| 238 |  |     }; | 
| 239 |  |  | 
| 240 |  |     /// Relocate/copy the slice's data into a new location. | 
| 241 |  |     /// | 
| 242 |  |     /// @param [in] d | 
| 243 |  |     ///   The new location for the data. If it's the same location, then no | 
| 244 |  |     ///   relocation is done. It is assumed that the new location is | 
| 245 |  |     ///   large enough to fit the data. | 
| 246 | 14.6M |     void relocate(char* d) { | 
| 247 | 14.6M |         if (data != d) { | 
| 248 | 14.6M |             memcpy(d, data, size); | 
| 249 | 14.6M |             data = d; | 
| 250 | 14.6M |         } | 
| 251 | 14.6M |     } | 
| 252 |  |  | 
| 253 |  |     friend bool operator==(const Slice& x, const Slice& y); | 
| 254 |  |  | 
| 255 |  |     friend std::ostream& operator<<(std::ostream& os, const Slice& slice); | 
| 256 |  |  | 
| 257 | 173M |     static bool mem_equal(const void* a, const void* b, size_t n) { return memcmp(a, b, n) == 0; } | 
| 258 |  |  | 
| 259 | 449M |     static int mem_compare(const void* a, const void* b, size_t n) { return memcmp(a, b, n); } | 
| 260 |  |  | 
| 261 | 3.73M |     static size_t compute_total_size(const std::vector<Slice>& slices) { | 
| 262 | 3.73M |         size_t total_size = 0; | 
| 263 | 3.87M |         for (auto& slice : slices) { | 
| 264 | 3.87M |             total_size += slice.size; | 
| 265 | 3.87M |         } | 
| 266 | 3.73M |         return total_size; | 
| 267 | 3.73M |     } | 
| 268 |  |  | 
| 269 | 0 |     static std::string to_string(const std::vector<Slice>& slices) { | 
| 270 | 0 |         std::string buf; | 
| 271 | 0 |         for (auto& slice : slices) { | 
| 272 | 0 |             buf.append(slice.data, slice.size); | 
| 273 | 0 |         } | 
| 274 | 0 |         return buf; | 
| 275 | 0 |     } | 
| 276 |  |  | 
| 277 |  |     // X is (maybe) a truncated prefix of string X' | 
| 278 |  |     // Y is (maybe) a truncated prefix of string Y' | 
| 279 |  |     // return true only if we can determine that X' is strictly less than Y' | 
| 280 |  |     // based on these maybe truncated prefixes | 
| 281 |  |     static bool lhs_is_strictly_less_than_rhs(Slice X, bool X_is_truncated, Slice Y, | 
| 282 |  |                                               bool Y_is_truncated); | 
| 283 |  | }; | 
| 284 |  |  | 
| 285 | 0 | inline std::ostream& operator<<(std::ostream& os, const Slice& slice) { | 
| 286 | 0 |     os << slice.to_string(); | 
| 287 | 0 |     return os; | 
| 288 | 0 | } | 
| 289 |  |  | 
| 290 |  | /// Check whether two slices are identical. | 
| 291 | 174M | inline bool operator==(const Slice& x, const Slice& y) { | 
| 292 | 174M |     return ((x.size == y.size) && (Slice::mem_equal(x.data, y.data, x.size))); | 
| 293 | 174M | } | 
| 294 |  |  | 
| 295 |  | /// Check whether two slices are not identical. | 
| 296 | 30.1k | inline bool operator!=(const Slice& x, const Slice& y) { | 
| 297 | 30.1k |     return !(x == y); | 
| 298 | 30.1k | } | 
| 299 |  |  | 
| 300 | 450M | inline int Slice::compare(const Slice& b) const { | 
| 301 | 450M |     const int min_len = (size < b.size) ? size : b.size; | 
| 302 | 450M |     int r = mem_compare(data, b.data, min_len); | 
| 303 | 450M |     if (r == 0) { | 
| 304 | 27.0M |         if (size < b.size) | 
| 305 | 20.7M |             r = -1; | 
| 306 | 6.26M |         else if (size > b.size) | 
| 307 | 2.21M |             r = +1; | 
| 308 | 27.0M |     } | 
| 309 | 450M |     return r; | 
| 310 | 450M | } | 
| 311 |  |  | 
| 312 |  | /// @brief STL map whose keys are Slices. | 
| 313 |  | /// | 
| 314 |  | /// An example of usage: | 
| 315 |  | /// @code | 
| 316 |  | ///   typedef SliceMap<int>::type MySliceMap; | 
| 317 |  | /// | 
| 318 |  | ///   MySliceMap my_map; | 
| 319 |  | ///   my_map.insert(MySliceMap::value_type(a, 1)); | 
| 320 |  | ///   my_map.insert(MySliceMap::value_type(b, 2)); | 
| 321 |  | ///   my_map.insert(MySliceMap::value_type(c, 3)); | 
| 322 |  | /// | 
| 323 |  | ///   for (const MySliceMap::value_type& pair : my_map) { | 
| 324 |  | ///     ... | 
| 325 |  | ///   } | 
| 326 |  | /// @endcode | 
| 327 |  | template <typename T> | 
| 328 |  | struct SliceMap { | 
| 329 |  |     /// A handy typedef for the slice map with appropriate comparison operator. | 
| 330 |  |     typedef std::map<Slice, T, Slice::Comparator> type; | 
| 331 |  | }; | 
| 332 |  |  | 
| 333 |  | // A move-only type which manage the lifecycle of externally allocated data. | 
| 334 |  | // Unlike std::unique_ptr<uint8_t[]>, OwnedSlice remembers the size of data so that clients can access | 
| 335 |  | // the underlying buffer as a Slice. | 
| 336 |  | // | 
| 337 |  | // Usage example: | 
| 338 |  | //   OwnedSlice read_page(PagePointer pp); | 
| 339 |  | //   { | 
| 340 |  | //     OwnedSlice page_data(new uint8_t[pp.size], pp.size); | 
| 341 |  | //     Status s = _file.read_at(pp.offset, owned.slice()); | 
| 342 |  | //     if (!s.ok()) { | 
| 343 |  | //       return s; // `page_data` destructs, deallocate underlying buffer | 
| 344 |  | //     } | 
| 345 |  | //     return page_data; // transfer ownership of buffer into the caller | 
| 346 |  | //   } | 
| 347 |  | // | 
| 348 |  | // only receive the memory allocated by Allocator and disables mmap, | 
| 349 |  | // otherwise the memory may not be freed correctly, currently only be constructed by faststring. | 
| 350 |  | class OwnedSlice : private Allocator<false, false, false, DefaultMemoryAllocator> { | 
| 351 |  | public: | 
| 352 | 6.73M |     OwnedSlice() : _slice((uint8_t*)nullptr, 0) {} | 
| 353 |  |  | 
| 354 |  |     OwnedSlice(size_t length) | 
| 355 |  |             : _slice(reinterpret_cast<char*>(Allocator::alloc(length)), length), | 
| 356 | 289 |               _capacity(length) {} | 
| 357 |  |  | 
| 358 | 2.58M |     OwnedSlice(OwnedSlice&& src) : _slice(src._slice), _capacity(src._capacity) { | 
| 359 | 2.58M |         src._slice.data = nullptr; | 
| 360 | 2.58M |         src._slice.size = 0; | 
| 361 | 2.58M |         src._capacity = 0; | 
| 362 | 2.58M |     } | 
| 363 |  |  | 
| 364 | 4.13M |     OwnedSlice& operator=(OwnedSlice&& src) { | 
| 365 | 4.13M |         if (this != &src) { | 
| 366 | 4.13M |             std::swap(_slice, src._slice); | 
| 367 | 4.13M |             std::swap(_capacity, src._capacity); | 
| 368 | 4.13M |         } | 
| 369 | 4.13M |         return *this; | 
| 370 | 4.13M |     } | 
| 371 |  |  | 
| 372 |  |     // disable copy constructor and copy assignment | 
| 373 |  |     OwnedSlice(const OwnedSlice&) = delete; | 
| 374 |  |     void operator=(const OwnedSlice&) = delete; | 
| 375 |  |  | 
| 376 | 11.7M |     ~OwnedSlice() { | 
| 377 | 11.7M |         if (_slice.data != nullptr) { | 
| 378 | 2.38M |             DCHECK(_capacity != 0); | 
| 379 | 2.38M |             Allocator::free(_slice.data, _capacity); | 
| 380 | 2.38M |         } | 
| 381 | 11.7M |     } | 
| 382 |  |  | 
| 383 | 3.73k |     char* data() const { return _slice.data; } | 
| 384 |  |  | 
| 385 | 9.87M |     const Slice& slice() const { return _slice; } | 
| 386 |  |  | 
| 387 |  | private: | 
| 388 |  |     // faststring also inherits Allocator and disables mmap. | 
| 389 |  |     friend class faststring; | 
| 390 |  |  | 
| 391 |  |     OwnedSlice(uint8_t* _data, size_t size, size_t capacity) | 
| 392 | 2.38M |             : _slice(_data, size), _capacity(capacity) {} | 
| 393 |  |  | 
| 394 |  |     Slice _slice; | 
| 395 |  |     size_t _capacity = 0; | 
| 396 |  | }; | 
| 397 |  |  | 
| 398 |  | } // namespace doris |