/root/doris/be/src/vec/common/arena.h

Source (jump to first uncovered line)
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Arena.h
// and modified by Doris

#pragma once

#include <common/compiler_util.h>
#include <string.h>

#include <boost/noncopyable.hpp>
#include <memory>
#include <vector>
#if __has_include(<sanitizer/asan_interface.h>)
#include <sanitizer/asan_interface.h>
#endif
#include "gutil/dynamic_annotations.h"
#include "vec/common/allocator.h"
#include "vec/common/allocator_fwd.h"
#include "vec/common/memcpy_small.h"

namespace doris::vectorized {

/** Memory pool to append something. For example, short strings.
  * Usage scenario:
  * - put lot of strings inside pool, keep their addresses;
  * - addresses remain valid during lifetime of pool;
  * - at destruction of pool, all memory is freed;
  * - memory is allocated and freed by large chunks;
  * - freeing parts of data is not possible (but look at ArenaWithFreeLists if you need);
  */
class Arena : private boost::noncopyable {
private:
    /// Padding allows to use 'memcpy_small_allow_read_write_overflow15' instead of 'memcpy'.
    static constexpr size_t pad_right = 15;

    /// Contiguous chunk of memory and pointer to free space inside it. Member of single-linked list.
    struct alignas(16) Chunk : private Allocator<false> /// empty base optimization
    {
        char* begin = nullptr;
        char* pos = nullptr;
        char* end = nullptr; /// does not include padding.

        Chunk* prev = nullptr;

        Chunk(size_t size_, Chunk* prev_) {
            begin = reinterpret_cast<char*>(Allocator<false>::alloc(size_));
            pos = begin;
            end = begin + size_ - pad_right;
            prev = prev_;

            ASAN_POISON_MEMORY_REGION(begin, size_);
        }

        ~Chunk() {
            /// We must unpoison the memory before returning to the allocator,
            /// because the allocator might not have asan integration, and the
            /// memory would stay poisoned forever. If the allocator supports
            /// asan, it will correctly poison the memory by itself.
            ASAN_UNPOISON_MEMORY_REGION(begin, size());

            Allocator<false>::free(begin, size());

            if (prev) delete prev;
        }

        size_t size() const { return end + pad_right - begin; }
        size_t remaining() const { return end - pos; }
        size_t used() const { return pos - begin; }
    };

    size_t growth_factor;
    size_t linear_growth_threshold;

    /// Last contiguous chunk of memory.
    Chunk* head = nullptr;
    size_t size_in_bytes;
    // The memory used by all chunks, excluding head.
    size_t _used_size_no_head;

    static size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 4096 * 4096; }

    /// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
    ///  (to not allocate too much excessive memory).
    size_t next_size(size_t min_next_size) const {
        size_t size_after_grow = 0;

        if (head->size() < linear_growth_threshold) {
            size_after_grow = std::max(min_next_size, head->size() * growth_factor);
        } else {
            // alloc_continue() combined with linear growth results in quadratic
            // behavior: we append the data by small amounts, and when it
            // doesn't fit, we create a new chunk and copy all the previous data
            // into it. The number of times we do this is directly proportional
            // to the total size of data that is going to be serialized. To make
            // the copying happen less often, round the next size up to the
            // linear_growth_threshold.
            size_after_grow =
                    ((min_next_size + linear_growth_threshold - 1) / linear_growth_threshold) *
                    linear_growth_threshold;
        }

        assert(size_after_grow >= min_next_size);
        return round_up_to_page_size(size_after_grow);
    }

    /// Add next contiguous chunk of memory with size not less than specified.
    void NO_INLINE add_chunk(size_t min_size) {
        _used_size_no_head += head->used();
        head = new Chunk(next_size(min_size + pad_right), head);
        size_in_bytes += head->size();
    }

    friend class ArenaAllocator;
    template <size_t>
    friend class AlignedArenaAllocator;

public:
    Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2,
          size_t linear_growth_threshold_ = 128 * 1024 * 1024)
            : growth_factor(growth_factor_),
              linear_growth_threshold(linear_growth_threshold_),
              head(new Chunk(initial_size_, nullptr)),
              size_in_bytes(head->size()),
              _used_size_no_head(0) {}

    ~Arena() { delete head; }

    /// Get piece of memory, without alignment.
    char* alloc(size_t size) {
        if (UNLIKELY(head->pos + size > head->end)) add_chunk(size);

        char* res = head->pos;
        head->pos += size;
        ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
        return res;
    }

    /// Get piece of memory with alignment
    char* aligned_alloc(size_t size, size_t alignment) {
        do {
            void* head_pos = head->pos;
            size_t space = head->end - head->pos;

            auto res = static_cast<char*>(std::align(alignment, size, head_pos, space));
            if (res) {
                head->pos = static_cast<char*>(head_pos);
                head->pos += size;
                ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
                return res;
            }

            add_chunk(size + alignment);
        } while (true);
    }

    template <typename T>
    T* alloc() {
        return reinterpret_cast<T*>(aligned_alloc(sizeof(T), alignof(T)));
    }

    /** Rollback just performed allocation.
      * Must pass size not more that was just allocated.
    * Return the resulting head pointer, so that the caller can assert that
    * the allocation it intended to roll back was indeed the last one.
      */
    void* rollback(size_t size) {
        head->pos -= size;
        ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
        return head->pos;
    }

    /** Begin or expand a contiguous range of memory.
      * 'range_start' is the start of range. If nullptr, a new range is
      * allocated.
      * If there is no space in the current chunk to expand the range,
      * the entire range is copied to a new, bigger memory chunk, and the value
      * of 'range_start' is updated.
      * If the optional 'start_alignment' is specified, the start of range is
      * kept aligned to this value.
      *
      * NOTE This method is usable only for the last allocation made on this
      * Arena. For earlier allocations, see 'realloc' method.
      */
    [[nodiscard]] char* alloc_continue(size_t additional_bytes, char const*& range_start,
                                       size_t start_alignment = 0) {
        if (!range_start) {
            // Start a new memory range.
            char* result = start_alignment ? aligned_alloc(additional_bytes, start_alignment)
                                           : alloc(additional_bytes);

            range_start = result;
            return result;
        }

        // Extend an existing memory range with 'additional_bytes'.

        // This method only works for extending the last allocation. For lack of
        // original size, check a weaker condition: that 'begin' is at least in
        // the current Chunk.
        assert(range_start >= head->begin && range_start < head->end);

        if (head->pos + additional_bytes <= head->end) {
            // The new size fits into the last chunk, so just alloc the
            // additional size. We can alloc without alignment here, because it
            // only applies to the start of the range, and we don't change it.
            return alloc(additional_bytes);
        }

        // New range doesn't fit into this chunk, will copy to a new one.
        //
        // Note: among other things, this method is used to provide a hack-ish
        // implementation of realloc over Arenas in ArenaAllocators. It wastes a
        // lot of memory -- quadratically so when we reach the linear allocation
        // threshold. This deficiency is intentionally left as is, and should be
        // solved not by complicating this method, but by rethinking the
        // approach to memory management for aggregate function states, so that
        // we can provide a proper realloc().
        const size_t existing_bytes = head->pos - range_start;
        const size_t new_bytes = existing_bytes + additional_bytes;
        const char* old_range = range_start;

        char* new_range =
                start_alignment ? aligned_alloc(new_bytes, start_alignment) : alloc(new_bytes);

        memcpy(new_range, old_range, existing_bytes);

        range_start = new_range;
        return new_range + existing_bytes;
    }

    /// NOTE Old memory region is wasted.
    [[nodiscard]] char* realloc(const char* old_data, size_t old_size, size_t new_size) {
        char* res = alloc(new_size);
        if (old_data) {
            memcpy(res, old_data, old_size);
            ASAN_POISON_MEMORY_REGION(old_data, old_size);
        }
        return res;
    }

    [[nodiscard]] char* aligned_realloc(const char* old_data, size_t old_size, size_t new_size,
                                        size_t alignment) {
        char* res = aligned_alloc(new_size, alignment);
        if (old_data) {
            memcpy(res, old_data, old_size);
            ASAN_POISON_MEMORY_REGION(old_data, old_size);
        }
        return res;
    }

    /// Insert string without alignment.
    [[nodiscard]] const char* insert(const char* data, size_t size) {
        char* res = alloc(size);
        memcpy(res, data, size);
        return res;
    }

    [[nodiscard]] const char* aligned_insert(const char* data, size_t size, size_t alignment) {
        char* res = aligned_alloc(size, alignment);
        memcpy(res, data, size);
        return res;
    }

    /**
    * Delete all the chunks before the head, usually the head is the largest chunk in the arena.
    * considering the scenario of memory reuse:
    * 1. first time, use arena alloc 64K memory, 4K each time, at this time, there are 4 chunks of 4k 8k 16k 32k in arena.
    * 2. then, clear arena, only one 32k chunk left in the arena.
    * 3. second time, same alloc 64K memory, there are 4 chunks of 4k 8k 16k 32k in arena.
    * 4. then, clear arena, only one 64k chunk left in the arena.
    * 5. third time, same alloc 64K memory, there is still only one 64K chunk in the arena, and the memory is fully reused.
    *
    * special case: if the chunk is larger than 128M, it will no longer be expanded by a multiple of 2.
    * If alloc 4G memory, 128M each time, then only one 128M chunk will be reserved after clearing,
    * and only 128M can be reused when you apply for 4G memory again.
    */
    void clear() {
        if (head->prev) {
            delete head->prev;
            head->prev = nullptr;
        }
        head->pos = head->begin;
        size_in_bytes = head->size();
        _used_size_no_head = 0;
    }

    /// Size of chunks in bytes.
    size_t size() const { return size_in_bytes; }

    size_t used_size() const { return _used_size_no_head + head->used(); }

    size_t remaining_space_in_current_chunk() const { return head->remaining(); }
};

using ArenaPtr = std::shared_ptr<Arena>;
using Arenas = std::vector<ArenaPtr>;

} // namespace doris::vectorized

Coverage Report

Created: 2024-11-22 12:06

Line	Count	Source (jump to first uncovered line)
1		// Licensed to the Apache Software Foundation (ASF) under one
2		// or more contributor license agreements. See the NOTICE file
3		// distributed with this work for additional information
4		// regarding copyright ownership. The ASF licenses this file
5		// to you under the Apache License, Version 2.0 (the
6		// "License"); you may not use this file except in compliance
7		// with the License. You may obtain a copy of the License at
8		//
9		// http://www.apache.org/licenses/LICENSE-2.0
10		//
11		// Unless required by applicable law or agreed to in writing,
12		// software distributed under the License is distributed on an
13		// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14		// KIND, either express or implied. See the License for the
15		// specific language governing permissions and limitations
16		// under the License.
17		// This file is copied from
18		// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Arena.h
19		// and modified by Doris
20
21		#pragma once
22
23		#include <common/compiler_util.h>
24		#include <string.h>
25
26		#include <boost/noncopyable.hpp>
27		#include <memory>
28		#include <vector>
29		#if __has_include(<sanitizer/asan_interface.h>)
30		#include <sanitizer/asan_interface.h>
31		#endif
32		#include "gutil/dynamic_annotations.h"
33		#include "vec/common/allocator.h"
34		#include "vec/common/allocator_fwd.h"
35		#include "vec/common/memcpy_small.h"
36
37		namespace doris::vectorized {
38
39		/** Memory pool to append something. For example, short strings.
40		* Usage scenario:
41		* - put lot of strings inside pool, keep their addresses;
42		* - addresses remain valid during lifetime of pool;
43		* - at destruction of pool, all memory is freed;
44		* - memory is allocated and freed by large chunks;
45		* - freeing parts of data is not possible (but look at ArenaWithFreeLists if you need);
46		*/
47		class Arena : private boost::noncopyable {
48		private:
49		/// Padding allows to use 'memcpy_small_allow_read_write_overflow15' instead of 'memcpy'.
50		static constexpr size_t pad_right = 15;
51
52		/// Contiguous chunk of memory and pointer to free space inside it. Member of single-linked list.
53		struct alignas(16) Chunk : private Allocator<false> /// empty base optimization
54		{
55		char* begin = nullptr;
56		char* pos = nullptr;
57		char* end = nullptr; /// does not include padding.
58
59		Chunk* prev = nullptr;
60
61	21.6k	Chunk(size_t size_, Chunk* prev_) {
62	21.6k	begin = reinterpret_cast<char*>(Allocator<false>::alloc(size_));
63	21.6k	pos = begin;
64	21.6k	end = begin + size_ - pad_right;
65	21.6k	prev = prev_;
66
67	21.6k	ASAN_POISON_MEMORY_REGION(begin, size_);
68	21.6k	}
69
70	21.6k	~Chunk() {
71		/// We must unpoison the memory before returning to the allocator,
72		/// because the allocator might not have asan integration, and the
73		/// memory would stay poisoned forever. If the allocator supports
74		/// asan, it will correctly poison the memory by itself.
75	21.6k	ASAN_UNPOISON_MEMORY_REGION(begin, size());
76
77	21.6k	Allocator<false>::free(begin, size());
78
79	21.6k	if (prev) delete prev;
80	21.6k	}
81
82	474k	size_t size() const { return end + pad_right - begin; }
83	9	size_t remaining() const { return end - pos; }
84	194	size_t used() const { return pos - begin; }
85		};
86
87		size_t growth_factor;
88		size_t linear_growth_threshold;
89
90		/// Last contiguous chunk of memory.
91		Chunk* head = nullptr;
92		size_t size_in_bytes;
93		// The memory used by all chunks, excluding head.
94		size_t _used_size_no_head;
95
96	100	static size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 4096 * 4096; }
97
98		/// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
99		/// (to not allocate too much excessive memory).
100	100	size_t next_size(size_t min_next_size) const {
101	100	size_t size_after_grow = 0;
102
103	100	if (head->size() < linear_growth_threshold) {
104	98	size_after_grow = std::max(min_next_size, head->size() * growth_factor);
105	98	} else {
106		// alloc_continue() combined with linear growth results in quadratic
107		// behavior: we append the data by small amounts, and when it
108		// doesn't fit, we create a new chunk and copy all the previous data
109		// into it. The number of times we do this is directly proportional
110		// to the total size of data that is going to be serialized. To make
111		// the copying happen less often, round the next size up to the
112		// linear_growth_threshold.
113	2	size_after_grow =
114	2	((min_next_size + linear_growth_threshold - 1) / linear_growth_threshold) *
115	2	linear_growth_threshold;
116	2	}
117
118	100	assert(size_after_grow >= min_next_size);
119	0	return round_up_to_page_size(size_after_grow);
120	100	}
121
122		/// Add next contiguous chunk of memory with size not less than specified.
123	100	void NO_INLINE add_chunk(size_t min_size) {
124	100	_used_size_no_head += head->used();
125	100	head = new Chunk(next_size(min_size + pad_right), head);
126	100	size_in_bytes += head->size();
127	100	}
128
129		friend class ArenaAllocator;
130		template <size_t>
131		friend class AlignedArenaAllocator;
132
133		public:
134		Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2,
135		size_t linear_growth_threshold_ = 128 * 1024 * 1024)
136		: growth_factor(growth_factor_),
137		linear_growth_threshold(linear_growth_threshold_),
138		head(new Chunk(initial_size_, nullptr)),
139		size_in_bytes(head->size()),
140	21.5k	_used_size_no_head(0) {}
141
142	21.5k	~Arena() { delete head; }
143
144		/// Get piece of memory, without alignment.
145	182k	char* alloc(size_t size) {
146	182k	if (UNLIKELY(head->pos + size > head->end)) add_chunk(size);
147
148	182k	char* res = head->pos;
149	182k	head->pos += size;
150	182k	ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
151	182k	return res;
152	182k	}
153
154		/// Get piece of memory with alignment
155	2	char* aligned_alloc(size_t size, size_t alignment) {
156	2	do {
157	2	void* head_pos = head->pos;
158	2	size_t space = head->end - head->pos;
159
160	2	auto res = static_cast<char*>(std::align(alignment, size, head_pos, space));
161	2	if (res) {
162	2	head->pos = static_cast<char*>(head_pos);
163	2	head->pos += size;
164	2	ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
165	2	return res;
166	2	}
167
168	0	add_chunk(size + alignment);
169	0	} while (true);
170	2	}
171
172		template <typename T>
173		T* alloc() {
174		return reinterpret_cast<T*>(aligned_alloc(sizeof(T), alignof(T)));
175		}
176
177		/** Rollback just performed allocation.
178		* Must pass size not more that was just allocated.
179		* Return the resulting head pointer, so that the caller can assert that
180		* the allocation it intended to roll back was indeed the last one.
181		*/
182	0	void* rollback(size_t size) {
183	0	head->pos -= size;
184	0	ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
185	0	return head->pos;
186	0	}
187
188		/** Begin or expand a contiguous range of memory.
189		* 'range_start' is the start of range. If nullptr, a new range is
190		* allocated.
191		* If there is no space in the current chunk to expand the range,
192		* the entire range is copied to a new, bigger memory chunk, and the value
193		* of 'range_start' is updated.
194		* If the optional 'start_alignment' is specified, the start of range is
195		* kept aligned to this value.
196		*
197		* NOTE This method is usable only for the last allocation made on this
198		* Arena. For earlier allocations, see 'realloc' method.
199		*/
200		[[nodiscard]] char* alloc_continue(size_t additional_bytes, char const*& range_start,
201	59	size_t start_alignment = 0) {
202	59	if (!range_start) {
203		// Start a new memory range.
204	13	char* result = start_alignment ? aligned_alloc(additional_bytes, start_alignment)
205	13	: alloc(additional_bytes);
206
207	13	range_start = result;
208	13	return result;
209	13	}
210
211		// Extend an existing memory range with 'additional_bytes'.
212
213		// This method only works for extending the last allocation. For lack of
214		// original size, check a weaker condition: that 'begin' is at least in
215		// the current Chunk.
216	46	assert(range_start >= head->begin && range_start < head->end);
217
218	46	if (head->pos + additional_bytes <= head->end) {
219		// The new size fits into the last chunk, so just alloc the
220		// additional size. We can alloc without alignment here, because it
221		// only applies to the start of the range, and we don't change it.
222	46	return alloc(additional_bytes);
223	46	}
224
225		// New range doesn't fit into this chunk, will copy to a new one.
226		//
227		// Note: among other things, this method is used to provide a hack-ish
228		// implementation of realloc over Arenas in ArenaAllocators. It wastes a
229		// lot of memory -- quadratically so when we reach the linear allocation
230		// threshold. This deficiency is intentionally left as is, and should be
231		// solved not by complicating this method, but by rethinking the
232		// approach to memory management for aggregate function states, so that
233		// we can provide a proper realloc().
234	0	const size_t existing_bytes = head->pos - range_start;
235	0	const size_t new_bytes = existing_bytes + additional_bytes;
236	0	const char* old_range = range_start;
237
238	0	char* new_range =
239	0	start_alignment ? aligned_alloc(new_bytes, start_alignment) : alloc(new_bytes);
240
241	0	memcpy(new_range, old_range, existing_bytes);
242
243	0	range_start = new_range;
244	0	return new_range + existing_bytes;
245	46	}
246
247		/// NOTE Old memory region is wasted.
248	0	[[nodiscard]] char* realloc(const char* old_data, size_t old_size, size_t new_size) {
249	0	char* res = alloc(new_size);
250	0	if (old_data) {
251	0	memcpy(res, old_data, old_size);
252	0	ASAN_POISON_MEMORY_REGION(old_data, old_size);
253	0	}
254	0	return res;
255	0	}
256
257		[[nodiscard]] char* aligned_realloc(const char* old_data, size_t old_size, size_t new_size,
258	0	size_t alignment) {
259	0	char* res = aligned_alloc(new_size, alignment);
260	0	if (old_data) {
261	0	memcpy(res, old_data, old_size);
262	0	ASAN_POISON_MEMORY_REGION(old_data, old_size);
263	0	}
264	0	return res;
265	0	}
266
267		/// Insert string without alignment.
268	25	[[nodiscard]] const char* insert(const char* data, size_t size) {
269	25	char* res = alloc(size);
270	25	memcpy(res, data, size);
271	25	return res;
272	25	}
273
274	0	[[nodiscard]] const char* aligned_insert(const char* data, size_t size, size_t alignment) {
275	0	char* res = aligned_alloc(size, alignment);
276	0	memcpy(res, data, size);
277	0	return res;
278	0	}
279
280		/**
281		* Delete all the chunks before the head, usually the head is the largest chunk in the arena.
282		* considering the scenario of memory reuse:
283		* 1. first time, use arena alloc 64K memory, 4K each time, at this time, there are 4 chunks of 4k 8k 16k 32k in arena.
284		* 2. then, clear arena, only one 32k chunk left in the arena.
285		* 3. second time, same alloc 64K memory, there are 4 chunks of 4k 8k 16k 32k in arena.
286		* 4. then, clear arena, only one 64k chunk left in the arena.
287		* 5. third time, same alloc 64K memory, there is still only one 64K chunk in the arena, and the memory is fully reused.
288		*
289		* special case: if the chunk is larger than 128M, it will no longer be expanded by a multiple of 2.
290		* If alloc 4G memory, 128M each time, then only one 128M chunk will be reserved after clearing,
291		* and only 128M can be reused when you apply for 4G memory again.
292		*/
293	409k	void clear() {
294	409k	if (head->prev) {
295	0	delete head->prev;
296	0	head->prev = nullptr;
297	0	}
298	409k	head->pos = head->begin;
299	409k	size_in_bytes = head->size();
300	409k	_used_size_no_head = 0;
301	409k	}
302
303		/// Size of chunks in bytes.
304	11	size_t size() const { return size_in_bytes; }
305
306	94	size_t used_size() const { return _used_size_no_head + head->used(); }
307
308	9	size_t remaining_space_in_current_chunk() const { return head->remaining(); }
309		};
310
311		using ArenaPtr = std::shared_ptr<Arena>;
312		using Arenas = std::vector<ArenaPtr>;
313
314		} // namespace doris::vectorized