/root/doris/be/src/vec/common/arena.h
| Line | Count | Source | 
| 1 |  | // Licensed to the Apache Software Foundation (ASF) under one | 
| 2 |  | // or more contributor license agreements.  See the NOTICE file | 
| 3 |  | // distributed with this work for additional information | 
| 4 |  | // regarding copyright ownership.  The ASF licenses this file | 
| 5 |  | // to you under the Apache License, Version 2.0 (the | 
| 6 |  | // "License"); you may not use this file except in compliance | 
| 7 |  | // with the License.  You may obtain a copy of the License at | 
| 8 |  | // | 
| 9 |  | //   http://www.apache.org/licenses/LICENSE-2.0 | 
| 10 |  | // | 
| 11 |  | // Unless required by applicable law or agreed to in writing, | 
| 12 |  | // software distributed under the License is distributed on an | 
| 13 |  | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
| 14 |  | // KIND, either express or implied.  See the License for the | 
| 15 |  | // specific language governing permissions and limitations | 
| 16 |  | // under the License. | 
| 17 |  | // This file is copied from | 
| 18 |  | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Arena.h | 
| 19 |  | // and modified by Doris | 
| 20 |  |  | 
| 21 |  | #pragma once | 
| 22 |  |  | 
| 23 |  | #include <common/compiler_util.h> | 
| 24 |  | #include <sanitizer/asan_interface.h> | 
| 25 |  | #include <string.h> | 
| 26 |  |  | 
| 27 |  | #include <boost/noncopyable.hpp> | 
| 28 |  | #include <memory> | 
| 29 |  | #include <vector> | 
| 30 |  |  | 
| 31 |  | #include "vec/common/allocator.h" | 
| 32 |  | #include "vec/common/allocator_fwd.h" | 
| 33 |  |  | 
| 34 |  | namespace doris::vectorized { | 
| 35 |  |  | 
| 36 |  | /** Memory pool to append something. For example, short strings. | 
| 37 |  |   * Usage scenario: | 
| 38 |  |   * - put lot of strings inside pool, keep their addresses; | 
| 39 |  |   * - addresses remain valid during lifetime of pool; | 
| 40 |  |   * - at destruction of pool, all memory is freed; | 
| 41 |  |   * - memory is allocated and freed by large chunks; | 
| 42 |  |   * - freeing parts of data is not possible (but look at ArenaWithFreeLists if you need); | 
| 43 |  |   */ | 
| 44 |  | class Arena : private boost::noncopyable { | 
| 45 |  | private: | 
| 46 |  |     /// Padding allows to use 'memcpy_small_allow_read_write_overflow15' instead of 'memcpy'. | 
| 47 |  |     static constexpr size_t pad_right = 15; | 
| 48 |  |  | 
| 49 |  |     /// Contiguous chunk of memory and pointer to free space inside it. Member of single-linked list. | 
| 50 |  |     struct alignas(16) Chunk : private Allocator<false> /// empty base optimization | 
| 51 |  |     { | 
| 52 |  |         char* begin = nullptr; | 
| 53 |  |         char* pos = nullptr; | 
| 54 |  |         char* end = nullptr; /// does not include padding. | 
| 55 |  |  | 
| 56 |  |         Chunk* prev = nullptr; | 
| 57 |  |  | 
| 58 | 42.1k |         Chunk(size_t size_, Chunk* prev_) { | 
| 59 | 42.1k |             begin = reinterpret_cast<char*>(Allocator<false>::alloc(size_)); | 
| 60 | 42.1k |             pos = begin; | 
| 61 | 42.1k |             end = begin + size_ - pad_right; | 
| 62 | 42.1k |             prev = prev_; | 
| 63 |  |  | 
| 64 | 42.1k |             ASAN_POISON_MEMORY_REGION(begin, size_); | 
| 65 | 42.1k |         } | 
| 66 |  |  | 
| 67 | 42.1k |         ~Chunk() { | 
| 68 |  |             /// We must unpoison the memory before returning to the allocator, | 
| 69 |  |             /// because the allocator might not have asan integration, and the | 
| 70 |  |             /// memory would stay poisoned forever. If the allocator supports | 
| 71 |  |             /// asan, it will correctly poison the memory by itself. | 
| 72 | 42.1k |             ASAN_UNPOISON_MEMORY_REGION(begin, size()); | 
| 73 |  |  | 
| 74 | 42.1k |             Allocator<false>::free(begin, size()); | 
| 75 |  |  | 
| 76 | 42.1k |             if (prev) delete prev; | 
| 77 | 42.1k |         } | 
| 78 |  |  | 
| 79 | 143k |         size_t size() const { return end + pad_right - begin; } | 
| 80 | 9 |         size_t remaining() const { return end - pos; } | 
| 81 | 10.1k |         size_t used() const { return pos - begin; } | 
| 82 |  |     }; | 
| 83 |  |  | 
| 84 |  |     size_t growth_factor = 2; | 
| 85 |  |     size_t linear_growth_threshold = 128 * 1024 * 1024; | 
| 86 |  |  | 
| 87 |  |     /// Last contiguous chunk of memory. | 
| 88 |  |     Chunk* head = nullptr; | 
| 89 |  |     size_t size_in_bytes = 0; | 
| 90 |  |     size_t _initial_size = 4096; | 
| 91 |  |     // The memory used by all chunks, excluding head. | 
| 92 |  |     size_t _used_size_no_head = 0; | 
| 93 |  |  | 
| 94 | 8.36k |     static size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 4096 * 4096; } | 
| 95 |  |  | 
| 96 |  |     /// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth | 
| 97 |  |     ///  (to not allocate too much excessive memory). | 
| 98 | 8.36k |     size_t next_size(size_t min_next_size) { | 
| 99 | 8.36k |         DCHECK(head != nullptr); | 
| 100 | 8.36k |         size_t size_after_grow = 0; | 
| 101 |  |  | 
| 102 | 8.36k |         if (head->size() < linear_growth_threshold) { | 
| 103 | 8.36k |             size_after_grow = std::max(min_next_size, head->size() * growth_factor); | 
| 104 | 8.36k |         } else { | 
| 105 |  |             // alloc_continue() combined with linear growth results in quadratic | 
| 106 |  |             // behavior: we append the data by small amounts, and when it | 
| 107 |  |             // doesn't fit, we create a new chunk and copy all the previous data | 
| 108 |  |             // into it. The number of times we do this is directly proportional | 
| 109 |  |             // to the total size of data that is going to be serialized. To make | 
| 110 |  |             // the copying happen less often, round the next size up to the | 
| 111 |  |             // linear_growth_threshold. | 
| 112 | 2 |             size_after_grow = | 
| 113 | 2 |                     ((min_next_size + linear_growth_threshold - 1) / linear_growth_threshold) * | 
| 114 | 2 |                     linear_growth_threshold; | 
| 115 | 2 |         } | 
| 116 |  |  | 
| 117 | 8.36k |         assert(size_after_grow >= min_next_size); | 
| 118 | 8.36k |         return round_up_to_page_size(size_after_grow); | 
| 119 | 8.36k |     } | 
| 120 |  |  | 
| 121 |  |     /// Add next contiguous chunk of memory with size not less than specified. | 
| 122 | 8.36k |     void NO_INLINE _add_chunk(size_t min_size) { | 
| 123 | 8.36k |         DCHECK(head != nullptr); | 
| 124 | 8.36k |         _used_size_no_head += head->used(); | 
| 125 | 8.36k |         head = new Chunk(next_size(min_size + pad_right), head); | 
| 126 | 8.36k |         size_in_bytes += head->size(); | 
| 127 | 8.36k |     } | 
| 128 |  |  | 
| 129 | 2.56M |     void _init_head_if_needed() { | 
| 130 | 2.56M |         if (UNLIKELY(head == nullptr)) { | 
| 131 | 33.7k |             head = new Chunk(_initial_size, nullptr); | 
| 132 | 33.7k |             size_in_bytes += head->size(); | 
| 133 | 33.7k |         } | 
| 134 | 2.56M |     } | 
| 135 |  |  | 
| 136 |  | public: | 
| 137 |  |     Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2, | 
| 138 |  |           size_t linear_growth_threshold_ = 128 * 1024 * 1024) | 
| 139 | 312k |             : growth_factor(growth_factor_), | 
| 140 | 312k |               linear_growth_threshold(linear_growth_threshold_), | 
| 141 | 312k |               _initial_size(initial_size_), | 
| 142 | 312k |               _used_size_no_head(0) {} | 
| 143 |  |  | 
| 144 | 312k |     ~Arena() { delete head; } | 
| 145 |  |  | 
| 146 |  |     /// Get piece of memory, without alignment. | 
| 147 | 2.52M |     char* alloc(size_t size) { | 
| 148 | 2.52M |         _init_head_if_needed(); | 
| 149 |  |  | 
| 150 | 2.52M |         if (UNLIKELY(head->pos + size > head->end)) { | 
| 151 | 8.34k |             _add_chunk(size); | 
| 152 | 8.34k |         } | 
| 153 |  |  | 
| 154 | 2.52M |         char* res = head->pos; | 
| 155 | 2.52M |         head->pos += size; | 
| 156 | 2.52M |         ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right); | 
| 157 | 2.52M |         return res; | 
| 158 | 2.52M |     } | 
| 159 |  |  | 
| 160 |  |     /// Get piece of memory with alignment | 
| 161 | 40.5k |     char* aligned_alloc(size_t size, size_t alignment) { | 
| 162 | 40.5k |         _init_head_if_needed(); | 
| 163 |  |  | 
| 164 | 40.5k |         do { | 
| 165 | 40.5k |             void* head_pos = head->pos; | 
| 166 | 40.5k |             size_t space = head->end - head->pos; | 
| 167 |  |  | 
| 168 | 40.5k |             auto res = static_cast<char*>(std::align(alignment, size, head_pos, space)); | 
| 169 | 40.5k |             if (res) { | 
| 170 | 40.5k |                 head->pos = static_cast<char*>(head_pos); | 
| 171 | 40.5k |                 head->pos += size; | 
| 172 | 40.5k |                 ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right); | 
| 173 | 40.5k |                 return res; | 
| 174 | 40.5k |             } | 
| 175 |  |  | 
| 176 | 18 |             _add_chunk(size + alignment); | 
| 177 | 18 |         } while (true); | 
| 178 | 40.5k |     } | 
| 179 |  |  | 
| 180 |  |     template <typename T> | 
| 181 | 40.4k |     T* alloc() { | 
| 182 | 40.4k |         return reinterpret_cast<T*>(aligned_alloc(sizeof(T), alignof(T))); | 
| 183 | 40.4k |     } | 
| 184 |  |  | 
| 185 |  |     /** Rollback just performed allocation. | 
| 186 |  |       * Must pass size not more that was just allocated. | 
| 187 |  |     * Return the resulting head pointer, so that the caller can assert that | 
| 188 |  |     * the allocation it intended to roll back was indeed the last one. | 
| 189 |  |       */ | 
| 190 | 0 |     void* rollback(size_t size) { | 
| 191 | 0 |         DCHECK(head != nullptr); | 
| 192 | 0 | 
 | 
| 193 | 0 |         head->pos -= size; | 
| 194 | 0 |         ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right); | 
| 195 | 0 |         return head->pos; | 
| 196 | 0 |     } | 
| 197 |  |  | 
| 198 |  |     /** Begin or expand a contiguous range of memory. | 
| 199 |  |       * 'range_start' is the start of range. If nullptr, a new range is | 
| 200 |  |       * allocated. | 
| 201 |  |       * If there is no space in the current chunk to expand the range, | 
| 202 |  |       * the entire range is copied to a new, bigger memory chunk, and the value | 
| 203 |  |       * of 'range_start' is updated. | 
| 204 |  |       * If the optional 'start_alignment' is specified, the start of range is | 
| 205 |  |       * kept aligned to this value. | 
| 206 |  |       * | 
| 207 |  |       * NOTE This method is usable only for the last allocation made on this | 
| 208 |  |       * Arena. For earlier allocations, see 'realloc' method. | 
| 209 |  |       */ | 
| 210 |  |     [[nodiscard]] char* alloc_continue(size_t additional_bytes, char const*& range_start, | 
| 211 | 25.3k |                                        size_t start_alignment = 0) { | 
| 212 | 25.3k |         if (!range_start) { | 
| 213 |  |             // Start a new memory range. | 
| 214 | 468 |             char* result = start_alignment ? aligned_alloc(additional_bytes, start_alignment) | 
| 215 | 468 |                                            : alloc(additional_bytes); | 
| 216 |  |  | 
| 217 | 468 |             range_start = result; | 
| 218 | 468 |             return result; | 
| 219 | 468 |         } | 
| 220 |  |  | 
| 221 | 25.3k |         DCHECK(head != nullptr); | 
| 222 |  |  | 
| 223 |  |         // Extend an existing memory range with 'additional_bytes'. | 
| 224 |  |  | 
| 225 |  |         // This method only works for extending the last allocation. For lack of | 
| 226 |  |         // original size, check a weaker condition: that 'begin' is at least in | 
| 227 |  |         // the current Chunk. | 
| 228 | 24.8k |         assert(range_start >= head->begin && range_start < head->end); | 
| 229 |  |  | 
| 230 | 24.8k |         if (head->pos + additional_bytes <= head->end) { | 
| 231 |  |             // The new size fits into the last chunk, so just alloc the | 
| 232 |  |             // additional size. We can alloc without alignment here, because it | 
| 233 |  |             // only applies to the start of the range, and we don't change it. | 
| 234 | 24.7k |             return alloc(additional_bytes); | 
| 235 | 24.7k |         } | 
| 236 |  |  | 
| 237 |  |         // New range doesn't fit into this chunk, will copy to a new one. | 
| 238 |  |         // | 
| 239 |  |         // Note: among other things, this method is used to provide a hack-ish | 
| 240 |  |         // implementation of realloc over Arenas in ArenaAllocators. It wastes a | 
| 241 |  |         // lot of memory -- quadratically so when we reach the linear allocation | 
| 242 |  |         // threshold. This deficiency is intentionally left as is, and should be | 
| 243 |  |         // solved not by complicating this method, but by rethinking the | 
| 244 |  |         // approach to memory management for aggregate function states, so that | 
| 245 |  |         // we can provide a proper realloc(). | 
| 246 | 44 |         const size_t existing_bytes = head->pos - range_start; | 
| 247 | 44 |         const size_t new_bytes = existing_bytes + additional_bytes; | 
| 248 | 44 |         const char* old_range = range_start; | 
| 249 |  |  | 
| 250 | 44 |         char* new_range = | 
| 251 | 44 |                 start_alignment ? aligned_alloc(new_bytes, start_alignment) : alloc(new_bytes); | 
| 252 |  |  | 
| 253 | 44 |         memcpy(new_range, old_range, existing_bytes); | 
| 254 |  |  | 
| 255 | 44 |         range_start = new_range; | 
| 256 | 44 |         return new_range + existing_bytes; | 
| 257 | 24.8k |     } | 
| 258 |  |  | 
| 259 |  |     /// NOTE Old memory region is wasted. | 
| 260 | 0 |     [[nodiscard]] char* realloc(const char* old_data, size_t old_size, size_t new_size) { | 
| 261 | 0 |         char* res = alloc(new_size); | 
| 262 | 0 |         if (old_data) { | 
| 263 | 0 |             memcpy(res, old_data, old_size); | 
| 264 | 0 |             ASAN_POISON_MEMORY_REGION(old_data, old_size); | 
| 265 | 0 |         } | 
| 266 | 0 |         return res; | 
| 267 | 0 |     } | 
| 268 |  |  | 
| 269 |  |     [[nodiscard]] char* aligned_realloc(const char* old_data, size_t old_size, size_t new_size, | 
| 270 | 0 |                                         size_t alignment) { | 
| 271 | 0 |         char* res = aligned_alloc(new_size, alignment); | 
| 272 | 0 |         if (old_data) { | 
| 273 | 0 |             memcpy(res, old_data, old_size); | 
| 274 | 0 |             ASAN_POISON_MEMORY_REGION(old_data, old_size); | 
| 275 | 0 |         } | 
| 276 | 0 |         return res; | 
| 277 | 0 |     } | 
| 278 |  |  | 
| 279 |  |     /// Insert string without alignment. | 
| 280 | 2.15M |     [[nodiscard]] const char* insert(const char* data, size_t size) { | 
| 281 | 2.15M |         char* res = alloc(size); | 
| 282 | 2.15M |         memcpy(res, data, size); | 
| 283 | 2.15M |         return res; | 
| 284 | 2.15M |     } | 
| 285 |  |  | 
| 286 | 0 |     [[nodiscard]] const char* aligned_insert(const char* data, size_t size, size_t alignment) { | 
| 287 | 0 |         char* res = aligned_alloc(size, alignment); | 
| 288 | 0 |         memcpy(res, data, size); | 
| 289 | 0 |         return res; | 
| 290 | 0 |     } | 
| 291 |  |  | 
| 292 |  |     /** | 
| 293 |  |     * Delete all the chunks before the head, usually the head is the largest chunk in the arena. | 
| 294 |  |     * considering the scenario of memory reuse: | 
| 295 |  |     * 1. first time, use arena alloc 64K memory, 4K each time, at this time, there are 4 chunks of 4k 8k 16k 32k in arena. | 
| 296 |  |     * 2. then, clear arena, only one 32k chunk left in the arena. | 
| 297 |  |     * 3. second time, same alloc 64K memory, there are 4 chunks of 4k 8k 16k 32k in arena. | 
| 298 |  |     * 4. then, clear arena, only one 64k chunk left in the arena. | 
| 299 |  |     * 5. third time, same alloc 64K memory, there is still only one 64K chunk in the arena, and the memory is fully reused. | 
| 300 |  |     * | 
| 301 |  |     * special case: if the chunk is larger than 128M, it will no longer be expanded by a multiple of 2. | 
| 302 |  |     * If alloc 4G memory, 128M each time, then only one 128M chunk will be reserved after clearing, | 
| 303 |  |     * and only 128M can be reused when you apply for 4G memory again. | 
| 304 |  |     */ | 
| 305 | 427k |     void clear(bool delete_head = false) { | 
| 306 | 427k |         if (head == nullptr) { | 
| 307 | 427k |             return; | 
| 308 | 427k |         } | 
| 309 |  |  | 
| 310 | 62 |         if (head->prev) { | 
| 311 | 6 |             delete head->prev; | 
| 312 | 6 |             head->prev = nullptr; | 
| 313 | 6 |         } | 
| 314 | 62 |         if (delete_head) { | 
| 315 | 3 |             delete head; | 
| 316 | 3 |             head = nullptr; | 
| 317 | 3 |             size_in_bytes = 0; | 
| 318 | 59 |         } else { | 
| 319 | 59 |             head->pos = head->begin; | 
| 320 | 59 |             size_in_bytes = head->size(); | 
| 321 | 59 |         } | 
| 322 | 62 |         _used_size_no_head = 0; | 
| 323 | 62 |     } | 
| 324 |  |  | 
| 325 |  |     /// Size of chunks in bytes. | 
| 326 | 28.5k |     size_t size() const { return size_in_bytes; } | 
| 327 |  |  | 
| 328 | 3.43k |     size_t used_size() const { | 
| 329 | 3.43k |         if (head == nullptr) { | 
| 330 | 1.64k |             return _used_size_no_head; | 
| 331 | 1.64k |         } | 
| 332 |  |  | 
| 333 | 1.78k |         return _used_size_no_head + head->used(); | 
| 334 | 3.43k |     } | 
| 335 |  |  | 
| 336 | 9 |     size_t remaining_space_in_current_chunk() const { | 
| 337 | 9 |         if (head == nullptr) { | 
| 338 | 0 |             return 0; | 
| 339 | 0 |         } | 
| 340 |  |  | 
| 341 | 9 |         return head->remaining(); | 
| 342 | 9 |     } | 
| 343 |  | }; | 
| 344 |  |  | 
| 345 |  | using ArenaPtr = std::shared_ptr<Arena>; | 
| 346 |  | using Arenas = std::vector<ArenaPtr>; | 
| 347 |  |  | 
| 348 |  | } // namespace doris::vectorized |