be/src/exec/common/agg_utils.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <variant> |
21 | | #include <vector> |
22 | | |
23 | | #include "core/arena.h" |
24 | | #include "exec/common/hash_table/hash_map_context.h" |
25 | | #include "exec/common/hash_table/hash_map_util.h" |
26 | | #include "exec/common/hash_table/ph_hash_map.h" |
27 | | #include "exec/common/hash_table/string_hash_map.h" |
28 | | |
29 | | namespace doris { |
30 | | |
31 | | template <typename T> |
32 | | using AggData = PHHashMap<T, AggregateDataPtr, HashCRC32<T>>; |
33 | | template <typename T> |
34 | | using AggDataNullable = DataWithNullKey<AggData<T>>; |
35 | | |
36 | | using AggregatedDataWithoutKey = AggregateDataPtr; |
37 | | using AggregatedDataWithStringKey = PHHashMap<StringRef, AggregateDataPtr>; |
38 | | using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>; |
39 | | |
40 | | using AggregatedDataWithUInt32KeyPhase2 = |
41 | | PHHashMap<UInt32, AggregateDataPtr, HashMixWrapper<UInt32>>; |
42 | | using AggregatedDataWithUInt64KeyPhase2 = |
43 | | PHHashMap<UInt64, AggregateDataPtr, HashMixWrapper<UInt64>>; |
44 | | |
45 | | using AggregatedDataWithNullableUInt32KeyPhase2 = |
46 | | DataWithNullKey<AggregatedDataWithUInt32KeyPhase2>; |
47 | | using AggregatedDataWithNullableUInt64KeyPhase2 = |
48 | | DataWithNullKey<AggregatedDataWithUInt64KeyPhase2>; |
49 | | using AggregatedDataWithNullableShortStringKey = DataWithNullKey<AggregatedDataWithShortStringKey>; |
50 | | |
51 | | using AggregatedMethodVariants = std::variant< |
52 | | std::monostate, MethodSerialized<AggregatedDataWithStringKey>, |
53 | | MethodOneNumber<UInt8, AggData<UInt8>>, MethodOneNumber<UInt16, AggData<UInt16>>, |
54 | | MethodOneNumber<UInt32, AggData<UInt32>>, MethodOneNumber<UInt64, AggData<UInt64>>, |
55 | | MethodStringNoCache<AggregatedDataWithShortStringKey>, |
56 | | MethodOneNumber<UInt128, AggData<UInt128>>, MethodOneNumber<UInt256, AggData<UInt256>>, |
57 | | MethodOneNumber<UInt32, AggregatedDataWithUInt32KeyPhase2>, |
58 | | MethodOneNumber<UInt64, AggregatedDataWithUInt64KeyPhase2>, |
59 | | MethodSingleNullableColumn<MethodOneNumber<UInt8, AggDataNullable<UInt8>>>, |
60 | | MethodSingleNullableColumn<MethodOneNumber<UInt16, AggDataNullable<UInt16>>>, |
61 | | MethodSingleNullableColumn<MethodOneNumber<UInt32, AggDataNullable<UInt32>>>, |
62 | | MethodSingleNullableColumn<MethodOneNumber<UInt64, AggDataNullable<UInt64>>>, |
63 | | MethodSingleNullableColumn< |
64 | | MethodOneNumber<UInt32, AggregatedDataWithNullableUInt32KeyPhase2>>, |
65 | | MethodSingleNullableColumn< |
66 | | MethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyPhase2>>, |
67 | | MethodSingleNullableColumn<MethodOneNumber<UInt128, AggDataNullable<UInt128>>>, |
68 | | MethodSingleNullableColumn<MethodOneNumber<UInt256, AggDataNullable<UInt256>>>, |
69 | | MethodSingleNullableColumn<MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>, |
70 | | MethodKeysFixed<AggData<UInt64>>, MethodKeysFixed<AggData<UInt72>>, |
71 | | MethodKeysFixed<AggData<UInt96>>, MethodKeysFixed<AggData<UInt104>>, |
72 | | MethodKeysFixed<AggData<UInt128>>, MethodKeysFixed<AggData<UInt136>>, |
73 | | MethodKeysFixed<AggData<UInt256>>>; |
74 | | |
75 | | struct AggregatedDataVariants |
76 | | : public DataVariants<AggregatedMethodVariants, MethodSingleNullableColumn, MethodOneNumber, |
77 | | DataWithNullKey> { |
78 | | AggregatedDataWithoutKey without_key = nullptr; |
79 | | |
80 | 78.4k | void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) { |
81 | 78.4k | bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); |
82 | | |
83 | 78.4k | switch (type) { |
84 | 1 | case HashKeyType::without_key: |
85 | 1 | break; |
86 | 15.8k | case HashKeyType::serialized: |
87 | 15.8k | method_variant.emplace<MethodSerialized<AggregatedDataWithStringKey>>(); |
88 | 15.8k | break; |
89 | 4.71k | case HashKeyType::int8_key: |
90 | 4.71k | emplace_single<UInt8, AggData<UInt8>>(nullable); |
91 | 4.71k | break; |
92 | 1.67k | case HashKeyType::int16_key: |
93 | 1.67k | emplace_single<UInt16, AggData<UInt16>>(nullable); |
94 | 1.67k | break; |
95 | 5.67k | case HashKeyType::int32_key: |
96 | 5.67k | emplace_single<UInt32, AggData<UInt32>>(nullable); |
97 | 5.67k | break; |
98 | 12.6k | case HashKeyType::int32_key_phase2: |
99 | 12.6k | emplace_single<UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable); |
100 | 12.6k | break; |
101 | 4.37k | case HashKeyType::int64_key: |
102 | 4.37k | emplace_single<UInt64, AggData<UInt64>>(nullable); |
103 | 4.37k | break; |
104 | 9.68k | case HashKeyType::int64_key_phase2: |
105 | 9.68k | emplace_single<UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable); |
106 | 9.68k | break; |
107 | 664 | case HashKeyType::int128_key: |
108 | 664 | emplace_single<UInt128, AggData<UInt128>>(nullable); |
109 | 664 | break; |
110 | 15 | case HashKeyType::int256_key: |
111 | 15 | emplace_single<UInt256, AggData<UInt256>>(nullable); |
112 | 15 | break; |
113 | 7.11k | case HashKeyType::string_key: |
114 | 7.11k | if (nullable) { |
115 | 5.89k | method_variant.emplace<MethodSingleNullableColumn< |
116 | 5.89k | MethodStringNoCache<AggregatedDataWithNullableShortStringKey>>>(); |
117 | 5.89k | } else { |
118 | 1.21k | method_variant.emplace<MethodStringNoCache<AggregatedDataWithShortStringKey>>(); |
119 | 1.21k | } |
120 | 7.11k | break; |
121 | 1.33k | case HashKeyType::fixed64: |
122 | 1.33k | method_variant.emplace<MethodKeysFixed<AggData<UInt64>>>(get_key_sizes(data_types)); |
123 | 1.33k | break; |
124 | 1.55k | case HashKeyType::fixed72: |
125 | 1.55k | method_variant.emplace<MethodKeysFixed<AggData<UInt72>>>(get_key_sizes(data_types)); |
126 | 1.55k | break; |
127 | 760 | case HashKeyType::fixed96: |
128 | 760 | method_variant.emplace<MethodKeysFixed<AggData<UInt96>>>(get_key_sizes(data_types)); |
129 | 760 | break; |
130 | 1.50k | case HashKeyType::fixed104: |
131 | 1.50k | method_variant.emplace<MethodKeysFixed<AggData<UInt104>>>(get_key_sizes(data_types)); |
132 | 1.50k | break; |
133 | 153 | case HashKeyType::fixed128: |
134 | 153 | method_variant.emplace<MethodKeysFixed<AggData<UInt128>>>(get_key_sizes(data_types)); |
135 | 153 | break; |
136 | 2.68k | case HashKeyType::fixed136: |
137 | 2.68k | method_variant.emplace<MethodKeysFixed<AggData<UInt136>>>(get_key_sizes(data_types)); |
138 | 2.68k | break; |
139 | 7.98k | case HashKeyType::fixed256: |
140 | 7.98k | method_variant.emplace<MethodKeysFixed<AggData<UInt256>>>(get_key_sizes(data_types)); |
141 | 7.98k | break; |
142 | 1 | default: |
143 | 1 | throw Exception(ErrorCode::INTERNAL_ERROR, |
144 | 1 | "AggregatedDataVariants meet invalid key type, type={}", type); |
145 | 78.4k | } |
146 | 78.4k | } |
147 | | }; |
148 | | |
149 | | using AggregatedDataVariantsUPtr = std::unique_ptr<AggregatedDataVariants>; |
150 | | using ArenaUPtr = std::unique_ptr<Arena>; |
151 | | |
152 | | struct AggregateDataContainer { |
153 | | public: |
154 | | AggregateDataContainer(size_t size_of_key, size_t size_of_aggregate_states) |
155 | 84.5k | : _size_of_key(size_of_key), _size_of_aggregate_states(size_of_aggregate_states) {} |
156 | | |
157 | 98.2k | int64_t memory_usage() const { return _arena_pool.size(); } |
158 | | |
159 | | template <typename KeyType> |
160 | 4.76M | AggregateDataPtr append_data(const KeyType& key) { |
161 | 4.76M | DCHECK_EQ(sizeof(KeyType), _size_of_key); |
162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero |
163 | 4.76M | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { |
164 | 33.9k | _expand(); |
165 | 33.9k | } |
166 | | |
167 | 4.76M | *reinterpret_cast<KeyType*>(_current_keys) = key; |
168 | 4.76M | auto* aggregate_data = _current_agg_data; |
169 | 4.76M | ++_total_count; |
170 | 4.76M | ++_index_in_sub_container; |
171 | 4.76M | _current_agg_data += _size_of_aggregate_states; |
172 | 4.76M | _current_keys += _size_of_key; |
173 | 4.76M | return aggregate_data; |
174 | 4.76M | } _ZN5doris22AggregateDataContainer11append_dataINS_9StringRefEEEPcRKT_ Line | Count | Source | 160 | 252k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 252k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 252k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 6.86k | _expand(); | 165 | 6.86k | } | 166 | | | 167 | 252k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 252k | auto* aggregate_data = _current_agg_data; | 169 | 252k | ++_total_count; | 170 | 252k | ++_index_in_sub_container; | 171 | 252k | _current_agg_data += _size_of_aggregate_states; | 172 | 252k | _current_keys += _size_of_key; | 173 | 252k | return aggregate_data; | 174 | 252k | } |
_ZN5doris22AggregateDataContainer11append_dataIhEEPcRKT_ Line | Count | Source | 160 | 5.54k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 5.54k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 5.54k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 2.60k | _expand(); | 165 | 2.60k | } | 166 | | | 167 | 5.54k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 5.54k | auto* aggregate_data = _current_agg_data; | 169 | 5.54k | ++_total_count; | 170 | 5.54k | ++_index_in_sub_container; | 171 | 5.54k | _current_agg_data += _size_of_aggregate_states; | 172 | 5.54k | _current_keys += _size_of_key; | 173 | 5.54k | return aggregate_data; | 174 | 5.54k | } |
_ZN5doris22AggregateDataContainer11append_dataItEEPcRKT_ Line | Count | Source | 160 | 3.92k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 3.92k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 3.92k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 558 | _expand(); | 165 | 558 | } | 166 | | | 167 | 3.92k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 3.92k | auto* aggregate_data = _current_agg_data; | 169 | 3.92k | ++_total_count; | 170 | 3.92k | ++_index_in_sub_container; | 171 | 3.92k | _current_agg_data += _size_of_aggregate_states; | 172 | 3.92k | _current_keys += _size_of_key; | 173 | 3.92k | return aggregate_data; | 174 | 3.92k | } |
_ZN5doris22AggregateDataContainer11append_dataIjEEPcRKT_ Line | Count | Source | 160 | 3.81M | AggregateDataPtr append_data(const KeyType& key) { | 161 | 3.81M | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 3.81M | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 12.1k | _expand(); | 165 | 12.1k | } | 166 | | | 167 | 3.81M | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 3.81M | auto* aggregate_data = _current_agg_data; | 169 | 3.81M | ++_total_count; | 170 | 3.81M | ++_index_in_sub_container; | 171 | 3.81M | _current_agg_data += _size_of_aggregate_states; | 172 | 3.81M | _current_keys += _size_of_key; | 173 | 3.81M | return aggregate_data; | 174 | 3.81M | } |
_ZN5doris22AggregateDataContainer11append_dataImEEPcRKT_ Line | Count | Source | 160 | 645k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 645k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 645k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 7.85k | _expand(); | 165 | 7.85k | } | 166 | | | 167 | 645k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 645k | auto* aggregate_data = _current_agg_data; | 169 | 645k | ++_total_count; | 170 | 645k | ++_index_in_sub_container; | 171 | 645k | _current_agg_data += _size_of_aggregate_states; | 172 | 645k | _current_keys += _size_of_key; | 173 | 645k | return aggregate_data; | 174 | 645k | } |
_ZN5doris22AggregateDataContainer11append_dataIN4wide7integerILm128EjEEEEPcRKT_ Line | Count | Source | 160 | 26.4k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 26.4k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 26.4k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 770 | _expand(); | 165 | 770 | } | 166 | | | 167 | 26.4k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 26.4k | auto* aggregate_data = _current_agg_data; | 169 | 26.4k | ++_total_count; | 170 | 26.4k | ++_index_in_sub_container; | 171 | 26.4k | _current_agg_data += _size_of_aggregate_states; | 172 | 26.4k | _current_keys += _size_of_key; | 173 | 26.4k | return aggregate_data; | 174 | 26.4k | } |
_ZN5doris22AggregateDataContainer11append_dataIN4wide7integerILm256EjEEEEPcRKT_ Line | Count | Source | 160 | 4.31k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 4.31k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 4.31k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 684 | _expand(); | 165 | 684 | } | 166 | | | 167 | 4.31k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 4.31k | auto* aggregate_data = _current_agg_data; | 169 | 4.31k | ++_total_count; | 170 | 4.31k | ++_index_in_sub_container; | 171 | 4.31k | _current_agg_data += _size_of_aggregate_states; | 172 | 4.31k | _current_keys += _size_of_key; | 173 | 4.31k | return aggregate_data; | 174 | 4.31k | } |
_ZN5doris22AggregateDataContainer11append_dataINS_6UInt72EEEPcRKT_ Line | Count | Source | 160 | 1.05k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 1.05k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 1.05k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 552 | _expand(); | 165 | 552 | } | 166 | | | 167 | 1.05k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 1.05k | auto* aggregate_data = _current_agg_data; | 169 | 1.05k | ++_total_count; | 170 | 1.05k | ++_index_in_sub_container; | 171 | 1.05k | _current_agg_data += _size_of_aggregate_states; | 172 | 1.05k | _current_keys += _size_of_key; | 173 | 1.05k | return aggregate_data; | 174 | 1.05k | } |
_ZN5doris22AggregateDataContainer11append_dataINS_6UInt96EEEPcRKT_ Line | Count | Source | 160 | 7.75k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 7.75k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 7.75k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 603 | _expand(); | 165 | 603 | } | 166 | | | 167 | 7.75k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 7.75k | auto* aggregate_data = _current_agg_data; | 169 | 7.75k | ++_total_count; | 170 | 7.75k | ++_index_in_sub_container; | 171 | 7.75k | _current_agg_data += _size_of_aggregate_states; | 172 | 7.75k | _current_keys += _size_of_key; | 173 | 7.75k | return aggregate_data; | 174 | 7.75k | } |
_ZN5doris22AggregateDataContainer11append_dataINS_7UInt104EEEPcRKT_ Line | Count | Source | 160 | 4.92k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 4.92k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 4.92k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 429 | _expand(); | 165 | 429 | } | 166 | | | 167 | 4.92k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 4.92k | auto* aggregate_data = _current_agg_data; | 169 | 4.92k | ++_total_count; | 170 | 4.92k | ++_index_in_sub_container; | 171 | 4.92k | _current_agg_data += _size_of_aggregate_states; | 172 | 4.92k | _current_keys += _size_of_key; | 173 | 4.92k | return aggregate_data; | 174 | 4.92k | } |
_ZN5doris22AggregateDataContainer11append_dataINS_7UInt136EEEPcRKT_ Line | Count | Source | 160 | 2.03k | AggregateDataPtr append_data(const KeyType& key) { | 161 | 2.03k | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 162 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 163 | 2.03k | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 164 | 933 | _expand(); | 165 | 933 | } | 166 | | | 167 | 2.03k | *reinterpret_cast<KeyType*>(_current_keys) = key; | 168 | 2.03k | auto* aggregate_data = _current_agg_data; | 169 | 2.03k | ++_total_count; | 170 | 2.03k | ++_index_in_sub_container; | 171 | 2.03k | _current_agg_data += _size_of_aggregate_states; | 172 | 2.03k | _current_keys += _size_of_key; | 173 | 2.03k | return aggregate_data; | 174 | 2.03k | } |
|
175 | | |
176 | | template <typename Derived, bool IsConst> |
177 | | class IteratorBase { |
178 | | using Container = |
179 | | std::conditional_t<IsConst, const AggregateDataContainer, AggregateDataContainer>; |
180 | | |
181 | | Container* container = nullptr; |
182 | | uint32_t index; |
183 | | uint32_t sub_container_index; |
184 | | uint32_t index_in_sub_container; |
185 | | |
186 | | friend class HashTable; |
187 | | |
188 | | public: |
189 | 84.5k | IteratorBase() = default; |
190 | | IteratorBase(Container* container_, uint32_t index_) |
191 | 5.11M | : container(container_), index(index_) { |
192 | 5.11M | sub_container_index = index / SUB_CONTAINER_CAPACITY; |
193 | 5.11M | index_in_sub_container = index - sub_container_index * SUB_CONTAINER_CAPACITY; |
194 | 5.11M | } |
195 | | |
196 | 79.4k | bool operator==(const IteratorBase& rhs) const { return index == rhs.index; } |
197 | 4.95M | bool operator!=(const IteratorBase& rhs) const { return index != rhs.index; } |
198 | | |
199 | 4.87M | Derived& operator++() { |
200 | 4.87M | index++; |
201 | 4.87M | index_in_sub_container++; |
202 | 4.87M | if (index_in_sub_container == SUB_CONTAINER_CAPACITY) { |
203 | 375 | index_in_sub_container = 0; |
204 | 375 | sub_container_index++; |
205 | 375 | } |
206 | 4.87M | return static_cast<Derived&>(*this); |
207 | 4.87M | } |
208 | | |
209 | | template <typename KeyType> |
210 | 4.86M | KeyType get_key() { |
211 | 4.86M | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); |
212 | 4.86M | return ((KeyType*)(container->_key_containers[sub_container_index])) |
213 | 4.86M | [index_in_sub_container]; |
214 | 4.86M | } _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_9StringRefEEET_v Line | Count | Source | 210 | 390k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 390k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 390k | [index_in_sub_container]; | 214 | 390k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIhEET_v Line | Count | Source | 210 | 5.54k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 5.54k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 5.54k | [index_in_sub_container]; | 214 | 5.54k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyItEET_v Line | Count | Source | 210 | 3.92k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 3.92k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 3.92k | [index_in_sub_container]; | 214 | 3.92k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIjEET_v Line | Count | Source | 210 | 3.76M | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 3.76M | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 3.76M | [index_in_sub_container]; | 214 | 3.76M | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyImEET_v Line | Count | Source | 210 | 659k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 659k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 659k | [index_in_sub_container]; | 214 | 659k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIN4wide7integerILm128EjEEEET_v Line | Count | Source | 210 | 26.9k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 26.9k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 26.9k | [index_in_sub_container]; | 214 | 26.9k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIN4wide7integerILm256EjEEEET_v Line | Count | Source | 210 | 6.09k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 6.09k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 6.09k | [index_in_sub_container]; | 214 | 6.09k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_6UInt72EEET_v Line | Count | Source | 210 | 1.05k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 1.05k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 1.05k | [index_in_sub_container]; | 214 | 1.05k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_6UInt96EEET_v Line | Count | Source | 210 | 7.67k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 7.67k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 7.67k | [index_in_sub_container]; | 214 | 7.67k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_7UInt104EEET_v Line | Count | Source | 210 | 4.93k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 4.93k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 4.93k | [index_in_sub_container]; | 214 | 4.93k | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_7UInt136EEET_v Line | Count | Source | 210 | 2.03k | KeyType get_key() { | 211 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 212 | 2.03k | return ((KeyType*)(container->_key_containers[sub_container_index])) | 213 | 2.03k | [index_in_sub_container]; | 214 | 2.03k | } |
|
215 | | |
216 | 4.70M | AggregateDataPtr get_aggregate_data() { |
217 | 4.70M | return &(container->_value_containers[sub_container_index] |
218 | 4.70M | [container->_size_of_aggregate_states * |
219 | 4.70M | index_in_sub_container]); |
220 | 4.70M | } |
221 | | }; |
222 | | |
223 | | class Iterator : public IteratorBase<Iterator, false> { |
224 | | public: |
225 | | using IteratorBase<Iterator, false>::IteratorBase; |
226 | | }; |
227 | | |
228 | | class ConstIterator : public IteratorBase<ConstIterator, true> { |
229 | | public: |
230 | | using IteratorBase<ConstIterator, true>::IteratorBase; |
231 | | }; |
232 | | |
233 | | ConstIterator begin() const { return {this, 0}; } |
234 | | |
235 | | ConstIterator cbegin() const { return begin(); } |
236 | | |
237 | 80.8k | Iterator begin() { return {this, 0}; } |
238 | | |
239 | | ConstIterator end() const { return {this, _total_count}; } |
240 | | ConstIterator cend() const { return end(); } |
241 | 5.03M | Iterator end() { return {this, _total_count}; } |
242 | | |
243 | 2.07k | [[nodiscard]] uint32_t total_count() const { return _total_count; } |
244 | | |
245 | 4.04k | size_t estimate_memory(size_t rows) const { |
246 | 4.04k | bool need_to_expand = false; |
247 | 4.04k | if (_total_count == 0) { |
248 | 1 | need_to_expand = true; |
249 | 4.04k | } else if ((_index_in_sub_container + rows) > SUB_CONTAINER_CAPACITY) { |
250 | 2 | need_to_expand = true; |
251 | 2 | rows -= (SUB_CONTAINER_CAPACITY - _index_in_sub_container); |
252 | 2 | } |
253 | | |
254 | 4.04k | if (!need_to_expand) { |
255 | 4.04k | return 0; |
256 | 4.04k | } |
257 | | |
258 | 3 | size_t count = (rows + SUB_CONTAINER_CAPACITY - 1) / SUB_CONTAINER_CAPACITY; |
259 | 3 | size_t size = _size_of_key * SUB_CONTAINER_CAPACITY; |
260 | 3 | size += _size_of_aggregate_states * SUB_CONTAINER_CAPACITY; |
261 | 3 | size *= count; |
262 | 3 | return size; |
263 | 4.04k | } |
264 | | |
265 | 86.7k | void init_once() { |
266 | 86.7k | if (_inited) { |
267 | 6.19k | return; |
268 | 6.19k | } |
269 | 80.5k | _inited = true; |
270 | 80.5k | iterator = begin(); |
271 | 80.5k | } |
272 | | Iterator iterator; |
273 | | |
274 | | private: |
275 | 33.9k | void _expand() { |
276 | 33.9k | _index_in_sub_container = 0; |
277 | 33.9k | _current_keys = nullptr; |
278 | 33.9k | _current_agg_data = nullptr; |
279 | 33.9k | try { |
280 | 33.9k | _current_keys = _arena_pool.alloc(_size_of_key * SUB_CONTAINER_CAPACITY); |
281 | 33.9k | _key_containers.emplace_back(_current_keys); |
282 | | |
283 | 33.9k | _current_agg_data = (AggregateDataPtr)_arena_pool.alloc(_size_of_aggregate_states * |
284 | 33.9k | SUB_CONTAINER_CAPACITY); |
285 | 33.9k | _value_containers.emplace_back(_current_agg_data); |
286 | 33.9k | } catch (...) { |
287 | 0 | if (_current_keys) { |
288 | 0 | _key_containers.pop_back(); |
289 | 0 | _current_keys = nullptr; |
290 | 0 | } |
291 | 0 | if (_current_agg_data) { |
292 | 0 | _value_containers.pop_back(); |
293 | 0 | _current_agg_data = nullptr; |
294 | 0 | } |
295 | 0 | throw; |
296 | 0 | } |
297 | 33.9k | } |
298 | | |
299 | | static constexpr uint32_t SUB_CONTAINER_CAPACITY = 8192; |
300 | | Arena _arena_pool; |
301 | | std::vector<char*> _key_containers; |
302 | | std::vector<AggregateDataPtr> _value_containers; |
303 | | AggregateDataPtr _current_agg_data = nullptr; |
304 | | char* _current_keys = nullptr; |
305 | | size_t _size_of_key {}; |
306 | | size_t _size_of_aggregate_states {}; |
307 | | uint32_t _index_in_sub_container {}; |
308 | | uint32_t _total_count {}; |
309 | | bool _inited = false; |
310 | | }; |
311 | | } // namespace doris |