be/src/exec/common/agg_utils.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <variant> |
21 | | #include <vector> |
22 | | |
23 | | #include "core/arena.h" |
24 | | #include "exec/common/hash_table/hash_map_context.h" |
25 | | #include "exec/common/hash_table/hash_map_util.h" |
26 | | #include "exec/common/hash_table/ph_hash_map.h" |
27 | | #include "exec/common/hash_table/string_hash_map.h" |
28 | | |
29 | | namespace doris { |
30 | | |
31 | | template <typename T> |
32 | | using AggData = PHHashMap<T, AggregateDataPtr, HashCRC32<T>>; |
33 | | template <typename T> |
34 | | using AggDataNullable = DataWithNullKey<AggData<T>>; |
35 | | |
36 | | using AggregatedDataWithoutKey = AggregateDataPtr; |
37 | | using AggregatedDataWithStringKey = PHHashMap<StringRef, AggregateDataPtr>; |
38 | | using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>; |
39 | | |
40 | | using AggregatedDataWithUInt32KeyPhase2 = |
41 | | PHHashMap<UInt32, AggregateDataPtr, HashMixWrapper<UInt32>>; |
42 | | using AggregatedDataWithUInt64KeyPhase2 = |
43 | | PHHashMap<UInt64, AggregateDataPtr, HashMixWrapper<UInt64>>; |
44 | | |
45 | | using AggregatedDataWithNullableUInt32KeyPhase2 = |
46 | | DataWithNullKey<AggregatedDataWithUInt32KeyPhase2>; |
47 | | using AggregatedDataWithNullableUInt64KeyPhase2 = |
48 | | DataWithNullKey<AggregatedDataWithUInt64KeyPhase2>; |
49 | | using AggregatedDataWithNullableShortStringKey = DataWithNullKey<AggregatedDataWithShortStringKey>; |
50 | | using AggregatedDataWithNullableStringKey = DataWithNullKey<AggregatedDataWithStringKey>; |
51 | | |
52 | | /// Parameterized method variant for aggregation hash tables. |
53 | | /// StringData / NullableStringData control which hash map is used for string keys: |
54 | | /// - AggregatedDataVariants uses StringHashMap (AggregatedDataWithShortStringKey) |
55 | | /// - BucketedAggDataVariants uses PHHashMap<StringRef> (AggregatedDataWithStringKey) |
56 | | /// to avoid StringHashMap's sub-table complexity and unify the emplace interface. |
57 | | template <typename StringData, typename NullableStringData> |
58 | | using AggMethodVariantsBase = std::variant< |
59 | | std::monostate, MethodSerialized<AggregatedDataWithStringKey>, |
60 | | MethodOneNumber<UInt8, AggData<UInt8>>, MethodOneNumber<UInt16, AggData<UInt16>>, |
61 | | MethodOneNumber<UInt32, AggData<UInt32>>, MethodOneNumber<UInt64, AggData<UInt64>>, |
62 | | MethodStringNoCache<StringData>, MethodOneNumber<UInt128, AggData<UInt128>>, |
63 | | MethodOneNumber<UInt256, AggData<UInt256>>, |
64 | | MethodOneNumber<UInt32, AggregatedDataWithUInt32KeyPhase2>, |
65 | | MethodOneNumber<UInt64, AggregatedDataWithUInt64KeyPhase2>, |
66 | | MethodSingleNullableColumn<MethodOneNumber<UInt8, AggDataNullable<UInt8>>>, |
67 | | MethodSingleNullableColumn<MethodOneNumber<UInt16, AggDataNullable<UInt16>>>, |
68 | | MethodSingleNullableColumn<MethodOneNumber<UInt32, AggDataNullable<UInt32>>>, |
69 | | MethodSingleNullableColumn<MethodOneNumber<UInt64, AggDataNullable<UInt64>>>, |
70 | | MethodSingleNullableColumn< |
71 | | MethodOneNumber<UInt32, AggregatedDataWithNullableUInt32KeyPhase2>>, |
72 | | MethodSingleNullableColumn< |
73 | | MethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyPhase2>>, |
74 | | MethodSingleNullableColumn<MethodOneNumber<UInt128, AggDataNullable<UInt128>>>, |
75 | | MethodSingleNullableColumn<MethodOneNumber<UInt256, AggDataNullable<UInt256>>>, |
76 | | MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>, |
77 | | MethodKeysFixed<AggData<UInt64>>, MethodKeysFixed<AggData<UInt72>>, |
78 | | MethodKeysFixed<AggData<UInt96>>, MethodKeysFixed<AggData<UInt104>>, |
79 | | MethodKeysFixed<AggData<UInt128>>, MethodKeysFixed<AggData<UInt136>>, |
80 | | MethodKeysFixed<AggData<UInt256>>>; |
81 | | |
82 | | using AggregatedMethodVariants = AggMethodVariantsBase<AggregatedDataWithShortStringKey, |
83 | | AggregatedDataWithNullableShortStringKey>; |
84 | | |
85 | | /// Bucketed agg uses PHHashMap<StringRef> for string keys instead of StringHashMap. |
86 | | /// This avoids StringHashMap's sub-table complexity and unifies the emplace interface |
87 | | /// (3-arg PHHashMap::emplace), while still using HashMethodString for correct |
88 | | /// single-column string key extraction. |
89 | | using BucketedAggMethodVariants = |
90 | | AggMethodVariantsBase<AggregatedDataWithStringKey, AggregatedDataWithNullableStringKey>; |
91 | | |
92 | | /// Intermediate base that adds the shared init logic for aggregation data |
93 | | /// variants. Only the string_key case differs between AggregatedDataVariants |
94 | | /// and BucketedAggDataVariants; all other key types are identical. The |
95 | | /// StringData/NullableStringData template parameters control which hash map |
96 | | /// type is emplaced for string_key. |
97 | | template <typename MethodVariants, typename StringData, typename NullableStringData> |
98 | | struct AggDataVariantsBase : public DataVariants<MethodVariants, MethodSingleNullableColumn, |
99 | | MethodOneNumber, DataWithNullKey> { |
100 | 61 | void init_agg_data(const std::vector<DataTypePtr>& data_types, HashKeyType type) { |
101 | 61 | bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); |
102 | | |
103 | 61 | switch (type) { |
104 | 1 | case HashKeyType::without_key: |
105 | 1 | break; |
106 | 1 | case HashKeyType::serialized: |
107 | 1 | this->method_variant.template emplace<MethodSerialized<AggregatedDataWithStringKey>>(); |
108 | 1 | break; |
109 | 1 | case HashKeyType::int8_key: |
110 | 1 | this->template emplace_single<UInt8, AggData<UInt8>>(nullable); |
111 | 1 | break; |
112 | 1 | case HashKeyType::int16_key: |
113 | 1 | this->template emplace_single<UInt16, AggData<UInt16>>(nullable); |
114 | 1 | break; |
115 | 2 | case HashKeyType::int32_key: |
116 | 2 | this->template emplace_single<UInt32, AggData<UInt32>>(nullable); |
117 | 2 | break; |
118 | 21 | case HashKeyType::int32_key_phase2: |
119 | 21 | this->template emplace_single<UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable); |
120 | 21 | break; |
121 | 1 | case HashKeyType::int64_key: |
122 | 1 | this->template emplace_single<UInt64, AggData<UInt64>>(nullable); |
123 | 1 | break; |
124 | 24 | case HashKeyType::int64_key_phase2: |
125 | 24 | this->template emplace_single<UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable); |
126 | 24 | break; |
127 | 1 | case HashKeyType::int128_key: |
128 | 1 | this->template emplace_single<UInt128, AggData<UInt128>>(nullable); |
129 | 1 | break; |
130 | 1 | case HashKeyType::int256_key: |
131 | 1 | this->template emplace_single<UInt256, AggData<UInt256>>(nullable); |
132 | 1 | break; |
133 | 2 | case HashKeyType::string_key: |
134 | 2 | if (nullable) { |
135 | 1 | this->method_variant.template emplace< |
136 | 1 | MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>>(); |
137 | 1 | } else { |
138 | 1 | this->method_variant.template emplace<MethodStringNoCache<StringData>>(); |
139 | 1 | } |
140 | 2 | break; |
141 | 1 | case HashKeyType::fixed64: |
142 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt64>>>( |
143 | 1 | get_key_sizes(data_types)); |
144 | 1 | break; |
145 | 0 | case HashKeyType::fixed72: |
146 | 0 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt72>>>( |
147 | 0 | get_key_sizes(data_types)); |
148 | 0 | break; |
149 | 0 | case HashKeyType::fixed96: |
150 | 0 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt96>>>( |
151 | 0 | get_key_sizes(data_types)); |
152 | 0 | break; |
153 | 0 | case HashKeyType::fixed104: |
154 | 0 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt104>>>( |
155 | 0 | get_key_sizes(data_types)); |
156 | 0 | break; |
157 | 1 | case HashKeyType::fixed128: |
158 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt128>>>( |
159 | 1 | get_key_sizes(data_types)); |
160 | 1 | break; |
161 | 1 | case HashKeyType::fixed136: |
162 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt136>>>( |
163 | 1 | get_key_sizes(data_types)); |
164 | 1 | break; |
165 | 1 | case HashKeyType::fixed256: |
166 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt256>>>( |
167 | 1 | get_key_sizes(data_types)); |
168 | 1 | break; |
169 | 1 | default: |
170 | 1 | throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid agg key type, type={}", type); |
171 | 61 | } |
172 | 61 | } _ZN5doris19AggDataVariantsBaseISt7variantIJSt9monostateNS_16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS5_vEEEENS_15MethodOneNumberIhS4_IhS6_9HashCRC32IhEEEENSB_ItS4_ItS6_SC_ItEEEENSB_IjS4_IjS6_SC_IjEEEENSB_ImS4_ImS6_SC_ImEEEENS_19MethodStringNoCacheINS_13StringHashMapIS6_NS_9AllocatorILb1ELb1ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEEENSB_IN4wide7integerILm128EjEES4_ISY_S6_SC_ISY_EEEENSB_INSX_ILm256EjEES4_IS12_S6_SC_IS12_EEEENSB_IjS4_IjS6_14HashMixWrapperIjSJ_EEEENSB_ImS4_ImS6_S16_ImSM_EEEENS_26MethodSingleNullableColumnINSB_IhNS_15DataWithNullKeyISE_EEEEEENS1D_INSB_ItNS1E_ISH_EEEEEENS1D_INSB_IjNS1E_ISK_EEEEEENS1D_INSB_ImNS1E_ISN_EEEEEENS1D_INSB_IjNS1E_IS18_EEEEEENS1D_INSB_ImNS1E_IS1B_EEEEEENS1D_INSB_ISY_NS1E_IS10_EEEEEENS1D_INSB_IS12_NS1E_IS14_EEEEEENS1D_INSP_INS1E_ISU_EEEEEENS_15MethodKeysFixedISN_EENS26_IS4_INS_6UInt72ES6_SC_IS28_EEEENS26_IS4_INS_6UInt96ES6_SC_IS2C_EEEENS26_IS4_INS_7UInt104ES6_SC_IS2G_EEEENS26_IS10_EENS26_IS4_INS_7UInt136ES6_SC_IS2L_EEEENS26_IS14_EEEESU_S23_E13init_agg_dataERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS2W_EENS_11HashKeyTypeE Line | Count | Source | 100 | 61 | void init_agg_data(const std::vector<DataTypePtr>& data_types, HashKeyType type) { | 101 | 61 | bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); | 102 | | | 103 | 61 | switch (type) { | 104 | 1 | case HashKeyType::without_key: | 105 | 1 | break; | 106 | 1 | case HashKeyType::serialized: | 107 | 1 | this->method_variant.template emplace<MethodSerialized<AggregatedDataWithStringKey>>(); | 108 | 1 | break; | 109 | 1 | case HashKeyType::int8_key: | 110 | 1 | this->template emplace_single<UInt8, AggData<UInt8>>(nullable); | 111 | 1 | break; | 112 | 1 | case HashKeyType::int16_key: | 113 | 1 | this->template emplace_single<UInt16, AggData<UInt16>>(nullable); | 114 | 1 | break; | 115 | 2 | case HashKeyType::int32_key: | 116 | 2 | this->template emplace_single<UInt32, AggData<UInt32>>(nullable); | 117 | 2 | break; | 118 | 21 | case HashKeyType::int32_key_phase2: | 119 | 21 | this->template emplace_single<UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable); | 120 | 21 | break; | 121 | 1 | case HashKeyType::int64_key: | 122 | 1 | this->template emplace_single<UInt64, AggData<UInt64>>(nullable); | 123 | 1 | break; | 124 | 24 | case HashKeyType::int64_key_phase2: | 125 | 24 | this->template emplace_single<UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable); | 126 | 24 | break; | 127 | 1 | case HashKeyType::int128_key: | 128 | 1 | this->template emplace_single<UInt128, AggData<UInt128>>(nullable); | 129 | 1 | break; | 130 | 1 | case HashKeyType::int256_key: | 131 | 1 | this->template emplace_single<UInt256, AggData<UInt256>>(nullable); | 132 | 1 | break; | 133 | 2 | case HashKeyType::string_key: | 134 | 2 | if (nullable) { | 135 | 1 | this->method_variant.template emplace< | 136 | 1 | MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>>(); | 137 | 1 | } else { | 138 | 1 | this->method_variant.template emplace<MethodStringNoCache<StringData>>(); | 139 | 1 | } | 140 | 2 | break; | 141 | 1 | case HashKeyType::fixed64: | 142 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt64>>>( | 143 | 1 | get_key_sizes(data_types)); | 144 | 1 | break; | 145 | 0 | case HashKeyType::fixed72: | 146 | 0 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt72>>>( | 147 | 0 | get_key_sizes(data_types)); | 148 | 0 | break; | 149 | 0 | case HashKeyType::fixed96: | 150 | 0 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt96>>>( | 151 | 0 | get_key_sizes(data_types)); | 152 | 0 | break; | 153 | 0 | case HashKeyType::fixed104: | 154 | 0 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt104>>>( | 155 | 0 | get_key_sizes(data_types)); | 156 | 0 | break; | 157 | 1 | case HashKeyType::fixed128: | 158 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt128>>>( | 159 | 1 | get_key_sizes(data_types)); | 160 | 1 | break; | 161 | 1 | case HashKeyType::fixed136: | 162 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt136>>>( | 163 | 1 | get_key_sizes(data_types)); | 164 | 1 | break; | 165 | 1 | case HashKeyType::fixed256: | 166 | 1 | this->method_variant.template emplace<MethodKeysFixed<AggData<UInt256>>>( | 167 | 1 | get_key_sizes(data_types)); | 168 | 1 | break; | 169 | 1 | default: | 170 | 1 | throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid agg key type, type={}", type); | 171 | 61 | } | 172 | 61 | } |
Unexecuted instantiation: _ZN5doris19AggDataVariantsBaseISt7variantIJSt9monostateNS_16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS5_vEEEENS_15MethodOneNumberIhS4_IhS6_9HashCRC32IhEEEENSB_ItS4_ItS6_SC_ItEEEENSB_IjS4_IjS6_SC_IjEEEENSB_ImS4_ImS6_SC_ImEEEENS_19MethodStringNoCacheIS9_EENSB_IN4wide7integerILm128EjEES4_IST_S6_SC_IST_EEEENSB_INSS_ILm256EjEES4_ISX_S6_SC_ISX_EEEENSB_IjS4_IjS6_14HashMixWrapperIjSJ_EEEENSB_ImS4_ImS6_S11_ImSM_EEEENS_26MethodSingleNullableColumnINSB_IhNS_15DataWithNullKeyISE_EEEEEENS18_INSB_ItNS19_ISH_EEEEEENS18_INSB_IjNS19_ISK_EEEEEENS18_INSB_ImNS19_ISN_EEEEEENS18_INSB_IjNS19_IS13_EEEEEENS18_INSB_ImNS19_IS16_EEEEEENS18_INSB_IST_NS19_ISV_EEEEEENS18_INSB_ISX_NS19_ISZ_EEEEEENS18_INSP_INS19_IS9_EEEEEENS_15MethodKeysFixedISN_EENS21_IS4_INS_6UInt72ES6_SC_IS23_EEEENS21_IS4_INS_6UInt96ES6_SC_IS27_EEEENS21_IS4_INS_7UInt104ES6_SC_IS2B_EEEENS21_ISV_EENS21_IS4_INS_7UInt136ES6_SC_IS2G_EEEENS21_ISZ_EEEES9_S1Y_E13init_agg_dataERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS2R_EENS_11HashKeyTypeE |
173 | | }; |
174 | | |
175 | | struct AggregatedDataVariants |
176 | | : public AggDataVariantsBase<AggregatedMethodVariants, AggregatedDataWithShortStringKey, |
177 | | AggregatedDataWithNullableShortStringKey> { |
178 | | AggregatedDataWithoutKey without_key = nullptr; |
179 | | |
180 | | bool is_fixed_key = true; |
181 | | |
182 | 61 | void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) { |
183 | 61 | is_fixed_key = !(type == HashKeyType::without_key || type == HashKeyType::EMPTY || |
184 | 61 | type == HashKeyType::serialized || type == HashKeyType::string_key); |
185 | 61 | this->init_agg_data(data_types, type); |
186 | 61 | } |
187 | | }; |
188 | | |
189 | | using AggregatedDataVariantsUPtr = std::unique_ptr<AggregatedDataVariants>; |
190 | | using ArenaUPtr = std::unique_ptr<Arena>; |
191 | | |
192 | | /// Data variants for bucketed hash aggregation. |
193 | | /// Uses BucketedAggMethodVariants (PHHashMap for string keys). |
194 | | struct BucketedAggDataVariants |
195 | | : public AggDataVariantsBase<BucketedAggMethodVariants, AggregatedDataWithStringKey, |
196 | | AggregatedDataWithNullableStringKey> { |
197 | 0 | void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) { |
198 | 0 | this->init_agg_data(data_types, type); |
199 | 0 | } |
200 | | }; |
201 | | |
202 | | using BucketedAggDataVariantsUPtr = std::unique_ptr<BucketedAggDataVariants>; |
203 | | |
204 | | struct AggregateDataContainer { |
205 | | public: |
206 | | AggregateDataContainer(size_t size_of_key, size_t size_of_aggregate_states) |
207 | 110 | : _size_of_key(size_of_key), _size_of_aggregate_states(size_of_aggregate_states) {} |
208 | | |
209 | 134 | int64_t memory_usage() const { return _arena_pool.size(); } |
210 | | |
211 | | template <typename KeyType> |
212 | 1.19M | AggregateDataPtr append_data(const KeyType& key) { |
213 | 1.19M | DCHECK_EQ(sizeof(KeyType), _size_of_key); |
214 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero |
215 | 1.19M | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { |
216 | 215 | _expand(); |
217 | 215 | } |
218 | | |
219 | 1.19M | *reinterpret_cast<KeyType*>(_current_keys) = key; |
220 | 1.19M | auto* aggregate_data = _current_agg_data; |
221 | 1.19M | ++_total_count; |
222 | 1.19M | ++_index_in_sub_container; |
223 | 1.19M | _current_agg_data += _size_of_aggregate_states; |
224 | 1.19M | _current_keys += _size_of_key; |
225 | 1.19M | return aggregate_data; |
226 | 1.19M | } _ZN5doris22AggregateDataContainer11append_dataIjEEPcRKT_ Line | Count | Source | 212 | 1.19M | AggregateDataPtr append_data(const KeyType& key) { | 213 | 1.19M | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 214 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 215 | 1.19M | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 216 | 192 | _expand(); | 217 | 192 | } | 218 | | | 219 | 1.19M | *reinterpret_cast<KeyType*>(_current_keys) = key; | 220 | 1.19M | auto* aggregate_data = _current_agg_data; | 221 | 1.19M | ++_total_count; | 222 | 1.19M | ++_index_in_sub_container; | 223 | 1.19M | _current_agg_data += _size_of_aggregate_states; | 224 | 1.19M | _current_keys += _size_of_key; | 225 | 1.19M | return aggregate_data; | 226 | 1.19M | } |
Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataINS_9StringRefEEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataIhEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataItEEPcRKT_ _ZN5doris22AggregateDataContainer11append_dataImEEPcRKT_ Line | Count | Source | 212 | 87 | AggregateDataPtr append_data(const KeyType& key) { | 213 | 87 | DCHECK_EQ(sizeof(KeyType), _size_of_key); | 214 | | // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero | 215 | 87 | if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) { | 216 | 23 | _expand(); | 217 | 23 | } | 218 | | | 219 | 87 | *reinterpret_cast<KeyType*>(_current_keys) = key; | 220 | 87 | auto* aggregate_data = _current_agg_data; | 221 | 87 | ++_total_count; | 222 | 87 | ++_index_in_sub_container; | 223 | 87 | _current_agg_data += _size_of_aggregate_states; | 224 | 87 | _current_keys += _size_of_key; | 225 | 87 | return aggregate_data; | 226 | 87 | } |
Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataIN4wide7integerILm128EjEEEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataIN4wide7integerILm256EjEEEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataINS_6UInt72EEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataINS_6UInt96EEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataINS_7UInt104EEEPcRKT_ Unexecuted instantiation: _ZN5doris22AggregateDataContainer11append_dataINS_7UInt136EEEPcRKT_ |
227 | | |
228 | | template <typename Derived, bool IsConst> |
229 | | class IteratorBase { |
230 | | using Container = |
231 | | std::conditional_t<IsConst, const AggregateDataContainer, AggregateDataContainer>; |
232 | | |
233 | | Container* container = nullptr; |
234 | | uint32_t index; |
235 | | uint32_t sub_container_index; |
236 | | uint32_t index_in_sub_container; |
237 | | |
238 | | friend class HashTable; |
239 | | |
240 | | public: |
241 | 110 | IteratorBase() = default; |
242 | | IteratorBase(Container* container_, uint32_t index_) |
243 | 1.08M | : container(container_), index(index_) { |
244 | 1.08M | sub_container_index = index / SUB_CONTAINER_CAPACITY; |
245 | 1.08M | index_in_sub_container = index - sub_container_index * SUB_CONTAINER_CAPACITY; |
246 | 1.08M | } _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EEC2EPS0_j Line | Count | Source | 243 | 1.08M | : container(container_), index(index_) { | 244 | 1.08M | sub_container_index = index / SUB_CONTAINER_CAPACITY; | 245 | 1.08M | index_in_sub_container = index - sub_container_index * SUB_CONTAINER_CAPACITY; | 246 | 1.08M | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_13ConstIteratorELb1EEC2EPKS0_j Line | Count | Source | 243 | 4 | : container(container_), index(index_) { | 244 | 4 | sub_container_index = index / SUB_CONTAINER_CAPACITY; | 245 | 4 | index_in_sub_container = index - sub_container_index * SUB_CONTAINER_CAPACITY; | 246 | 4 | } |
|
247 | | |
248 | 62 | bool operator==(const IteratorBase& rhs) const { return index == rhs.index; }_ZNK5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EEeqERKS3_ Line | Count | Source | 248 | 60 | bool operator==(const IteratorBase& rhs) const { return index == rhs.index; } |
_ZNK5doris22AggregateDataContainer12IteratorBaseINS0_13ConstIteratorELb1EEeqERKS3_ Line | Count | Source | 248 | 2 | bool operator==(const IteratorBase& rhs) const { return index == rhs.index; } |
|
249 | 1.08M | bool operator!=(const IteratorBase& rhs) const { return index != rhs.index; } |
250 | | |
251 | 1.09M | Derived& operator++() { |
252 | 1.09M | index++; |
253 | 1.09M | index_in_sub_container++; |
254 | 1.09M | if (index_in_sub_container == SUB_CONTAINER_CAPACITY) { |
255 | 132 | index_in_sub_container = 0; |
256 | 132 | sub_container_index++; |
257 | 132 | } |
258 | 1.09M | return static_cast<Derived&>(*this); |
259 | 1.09M | } _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EEppEv Line | Count | Source | 251 | 1.09M | Derived& operator++() { | 252 | 1.09M | index++; | 253 | 1.09M | index_in_sub_container++; | 254 | 1.09M | if (index_in_sub_container == SUB_CONTAINER_CAPACITY) { | 255 | 132 | index_in_sub_container = 0; | 256 | 132 | sub_container_index++; | 257 | 132 | } | 258 | 1.09M | return static_cast<Derived&>(*this); | 259 | 1.09M | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_13ConstIteratorELb1EEppEv Line | Count | Source | 251 | 5 | Derived& operator++() { | 252 | 5 | index++; | 253 | 5 | index_in_sub_container++; | 254 | 5 | if (index_in_sub_container == SUB_CONTAINER_CAPACITY) { | 255 | 0 | index_in_sub_container = 0; | 256 | 0 | sub_container_index++; | 257 | 0 | } | 258 | 5 | return static_cast<Derived&>(*this); | 259 | 5 | } |
|
260 | | |
261 | | template <typename KeyType> |
262 | 1.09M | KeyType get_key() { |
263 | 1.09M | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); |
264 | 1.09M | return ((KeyType*)(container->_key_containers[sub_container_index])) |
265 | 1.09M | [index_in_sub_container]; |
266 | 1.09M | } _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIjEET_v Line | Count | Source | 262 | 1.09M | KeyType get_key() { | 263 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 264 | 1.09M | return ((KeyType*)(container->_key_containers[sub_container_index])) | 265 | 1.09M | [index_in_sub_container]; | 266 | 1.09M | } |
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_13ConstIteratorELb1EE7get_keyIjEET_v Line | Count | Source | 262 | 5 | KeyType get_key() { | 263 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 264 | 5 | return ((KeyType*)(container->_key_containers[sub_container_index])) | 265 | 5 | [index_in_sub_container]; | 266 | 5 | } |
Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_9StringRefEEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIhEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyItEET_v _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyImEET_v Line | Count | Source | 262 | 100 | KeyType get_key() { | 263 | | DCHECK_EQ(sizeof(KeyType), container->_size_of_key); | 264 | 100 | return ((KeyType*)(container->_key_containers[sub_container_index])) | 265 | 100 | [index_in_sub_container]; | 266 | 100 | } |
Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIN4wide7integerILm128EjEEEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIN4wide7integerILm256EjEEEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_6UInt72EEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_6UInt96EEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_7UInt104EEET_v Unexecuted instantiation: _ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_7UInt136EEET_v |
267 | | |
268 | 1.08M | AggregateDataPtr get_aggregate_data() { |
269 | 1.08M | return &(container->_value_containers[sub_container_index] |
270 | 1.08M | [container->_size_of_aggregate_states * |
271 | 1.08M | index_in_sub_container]); |
272 | 1.08M | } |
273 | | }; |
274 | | |
275 | | class Iterator : public IteratorBase<Iterator, false> { |
276 | | public: |
277 | | using IteratorBase<Iterator, false>::IteratorBase; |
278 | | }; |
279 | | |
280 | | class ConstIterator : public IteratorBase<ConstIterator, true> { |
281 | | public: |
282 | | using IteratorBase<ConstIterator, true>::IteratorBase; |
283 | | }; |
284 | | |
285 | 2 | ConstIterator begin() const { return {this, 0}; } |
286 | | |
287 | 2 | ConstIterator cbegin() const { return begin(); } |
288 | | |
289 | 88 | Iterator begin() { return {this, 0}; } |
290 | | |
291 | 2 | ConstIterator end() const { return {this, _total_count}; } |
292 | 2 | ConstIterator cend() const { return end(); } |
293 | 1.08M | Iterator end() { return {this, _total_count}; } |
294 | | |
295 | 10 | [[nodiscard]] uint32_t total_count() const { return _total_count; } |
296 | | |
297 | 19 | size_t estimate_memory(size_t rows) const { |
298 | 19 | bool need_to_expand = false; |
299 | 19 | if (_total_count == 0) { |
300 | 1 | need_to_expand = true; |
301 | 18 | } else if ((_index_in_sub_container + rows) > SUB_CONTAINER_CAPACITY) { |
302 | 2 | need_to_expand = true; |
303 | 2 | rows -= (SUB_CONTAINER_CAPACITY - _index_in_sub_container); |
304 | 2 | } |
305 | | |
306 | 19 | if (!need_to_expand) { |
307 | 16 | return 0; |
308 | 16 | } |
309 | | |
310 | 3 | size_t count = (rows + SUB_CONTAINER_CAPACITY - 1) / SUB_CONTAINER_CAPACITY; |
311 | 3 | size_t size = _size_of_key * SUB_CONTAINER_CAPACITY; |
312 | 3 | size += _size_of_aggregate_states * SUB_CONTAINER_CAPACITY; |
313 | 3 | size *= count; |
314 | 3 | return size; |
315 | 19 | } |
316 | | |
317 | 113 | void init_once() { |
318 | 113 | if (_inited) { |
319 | 35 | return; |
320 | 35 | } |
321 | 78 | _inited = true; |
322 | 78 | iterator = begin(); |
323 | 78 | } |
324 | | Iterator iterator; |
325 | | |
326 | | private: |
327 | 215 | void _expand() { |
328 | 215 | _index_in_sub_container = 0; |
329 | 215 | _current_keys = nullptr; |
330 | 215 | _current_agg_data = nullptr; |
331 | 215 | try { |
332 | 215 | _current_keys = _arena_pool.alloc(_size_of_key * SUB_CONTAINER_CAPACITY); |
333 | 215 | _key_containers.emplace_back(_current_keys); |
334 | | |
335 | 215 | _current_agg_data = (AggregateDataPtr)_arena_pool.alloc(_size_of_aggregate_states * |
336 | 215 | SUB_CONTAINER_CAPACITY); |
337 | 215 | _value_containers.emplace_back(_current_agg_data); |
338 | 215 | } catch (...) { |
339 | 0 | if (_current_keys) { |
340 | 0 | _key_containers.pop_back(); |
341 | 0 | _current_keys = nullptr; |
342 | 0 | } |
343 | 0 | if (_current_agg_data) { |
344 | 0 | _value_containers.pop_back(); |
345 | 0 | _current_agg_data = nullptr; |
346 | 0 | } |
347 | 0 | throw; |
348 | 0 | } |
349 | 215 | } |
350 | | |
351 | | static constexpr uint32_t SUB_CONTAINER_CAPACITY = 8192; |
352 | | Arena _arena_pool; |
353 | | std::vector<char*> _key_containers; |
354 | | std::vector<AggregateDataPtr> _value_containers; |
355 | | AggregateDataPtr _current_agg_data = nullptr; |
356 | | char* _current_keys = nullptr; |
357 | | size_t _size_of_key {}; |
358 | | size_t _size_of_aggregate_states {}; |
359 | | uint32_t _index_in_sub_container {}; |
360 | | uint32_t _total_count {}; |
361 | | bool _inited = false; |
362 | | }; |
363 | | } // namespace doris |