Coverage Report

Created: 2026-05-19 14:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/agg_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <variant>
21
#include <vector>
22
23
#include "core/arena.h"
24
#include "exec/common/hash_table/hash_map_context.h"
25
#include "exec/common/hash_table/hash_map_util.h"
26
#include "exec/common/hash_table/ph_hash_map.h"
27
#include "exec/common/hash_table/string_hash_map.h"
28
29
namespace doris {
30
31
template <typename T>
32
using AggData = PHHashMap<T, AggregateDataPtr, HashCRC32<T>>;
33
template <typename T>
34
using AggDataNullable = DataWithNullKey<AggData<T>>;
35
36
using AggregatedDataWithoutKey = AggregateDataPtr;
37
using AggregatedDataWithStringKey = PHHashMap<StringRef, AggregateDataPtr>;
38
using AggregatedDataWithShortStringKey = StringHashMap<AggregateDataPtr>;
39
40
using AggregatedDataWithUInt32KeyPhase2 =
41
        PHHashMap<UInt32, AggregateDataPtr, HashMixWrapper<UInt32>>;
42
using AggregatedDataWithUInt64KeyPhase2 =
43
        PHHashMap<UInt64, AggregateDataPtr, HashMixWrapper<UInt64>>;
44
45
using AggregatedDataWithNullableUInt32KeyPhase2 =
46
        DataWithNullKey<AggregatedDataWithUInt32KeyPhase2>;
47
using AggregatedDataWithNullableUInt64KeyPhase2 =
48
        DataWithNullKey<AggregatedDataWithUInt64KeyPhase2>;
49
using AggregatedDataWithNullableShortStringKey = DataWithNullKey<AggregatedDataWithShortStringKey>;
50
using AggregatedDataWithNullableStringKey = DataWithNullKey<AggregatedDataWithStringKey>;
51
52
/// Parameterized method variant for aggregation hash tables.
53
/// StringData / NullableStringData control which hash map is used for string keys:
54
///   - AggregatedDataVariants uses StringHashMap (AggregatedDataWithShortStringKey)
55
///   - BucketedAggDataVariants uses PHHashMap<StringRef> (AggregatedDataWithStringKey)
56
///     to avoid StringHashMap's sub-table complexity and unify the emplace interface.
57
template <typename StringData, typename NullableStringData>
58
using AggMethodVariantsBase = std::variant<
59
        std::monostate, MethodSerialized<AggregatedDataWithStringKey>,
60
        MethodOneNumber<UInt8, AggData<UInt8>>, MethodOneNumber<UInt16, AggData<UInt16>>,
61
        MethodOneNumber<UInt32, AggData<UInt32>>, MethodOneNumber<UInt64, AggData<UInt64>>,
62
        MethodStringNoCache<StringData>, MethodOneNumber<UInt128, AggData<UInt128>>,
63
        MethodOneNumber<UInt256, AggData<UInt256>>,
64
        MethodOneNumber<UInt32, AggregatedDataWithUInt32KeyPhase2>,
65
        MethodOneNumber<UInt64, AggregatedDataWithUInt64KeyPhase2>,
66
        MethodSingleNullableColumn<MethodOneNumber<UInt8, AggDataNullable<UInt8>>>,
67
        MethodSingleNullableColumn<MethodOneNumber<UInt16, AggDataNullable<UInt16>>>,
68
        MethodSingleNullableColumn<MethodOneNumber<UInt32, AggDataNullable<UInt32>>>,
69
        MethodSingleNullableColumn<MethodOneNumber<UInt64, AggDataNullable<UInt64>>>,
70
        MethodSingleNullableColumn<
71
                MethodOneNumber<UInt32, AggregatedDataWithNullableUInt32KeyPhase2>>,
72
        MethodSingleNullableColumn<
73
                MethodOneNumber<UInt64, AggregatedDataWithNullableUInt64KeyPhase2>>,
74
        MethodSingleNullableColumn<MethodOneNumber<UInt128, AggDataNullable<UInt128>>>,
75
        MethodSingleNullableColumn<MethodOneNumber<UInt256, AggDataNullable<UInt256>>>,
76
        MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>,
77
        MethodKeysFixed<AggData<UInt64>>, MethodKeysFixed<AggData<UInt72>>,
78
        MethodKeysFixed<AggData<UInt96>>, MethodKeysFixed<AggData<UInt104>>,
79
        MethodKeysFixed<AggData<UInt128>>, MethodKeysFixed<AggData<UInt136>>,
80
        MethodKeysFixed<AggData<UInt256>>>;
81
82
using AggregatedMethodVariants = AggMethodVariantsBase<AggregatedDataWithShortStringKey,
83
                                                       AggregatedDataWithNullableShortStringKey>;
84
85
/// Bucketed agg uses PHHashMap<StringRef> for string keys instead of StringHashMap.
86
/// This avoids StringHashMap's sub-table complexity and unifies the emplace interface
87
/// (3-arg PHHashMap::emplace), while still using HashMethodString for correct
88
/// single-column string key extraction.
89
using BucketedAggMethodVariants =
90
        AggMethodVariantsBase<AggregatedDataWithStringKey, AggregatedDataWithNullableStringKey>;
91
92
/// Intermediate base that adds the shared init logic for aggregation data
93
/// variants.  Only the string_key case differs between AggregatedDataVariants
94
/// and BucketedAggDataVariants; all other key types are identical.  The
95
/// StringData/NullableStringData template parameters control which hash map
96
/// type is emplaced for string_key.
97
template <typename MethodVariants, typename StringData, typename NullableStringData>
98
struct AggDataVariantsBase : public DataVariants<MethodVariants, MethodSingleNullableColumn,
99
                                                 MethodOneNumber, DataWithNullKey> {
100
181k
    void init_agg_data(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
101
181k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
102
103
181k
        switch (type) {
104
1
        case HashKeyType::without_key:
105
1
            break;
106
23.9k
        case HashKeyType::serialized:
107
23.9k
            this->method_variant.template emplace<MethodSerialized<AggregatedDataWithStringKey>>();
108
23.9k
            break;
109
6.08k
        case HashKeyType::int8_key:
110
6.08k
            this->template emplace_single<UInt8, AggData<UInt8>>(nullable);
111
6.08k
            break;
112
9.97k
        case HashKeyType::int16_key:
113
9.97k
            this->template emplace_single<UInt16, AggData<UInt16>>(nullable);
114
9.97k
            break;
115
11.5k
        case HashKeyType::int32_key:
116
11.5k
            this->template emplace_single<UInt32, AggData<UInt32>>(nullable);
117
11.5k
            break;
118
18.4k
        case HashKeyType::int32_key_phase2:
119
18.4k
            this->template emplace_single<UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable);
120
18.4k
            break;
121
18.0k
        case HashKeyType::int64_key:
122
18.0k
            this->template emplace_single<UInt64, AggData<UInt64>>(nullable);
123
18.0k
            break;
124
9.68k
        case HashKeyType::int64_key_phase2:
125
9.68k
            this->template emplace_single<UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable);
126
9.68k
            break;
127
1.19k
        case HashKeyType::int128_key:
128
1.19k
            this->template emplace_single<UInt128, AggData<UInt128>>(nullable);
129
1.19k
            break;
130
39
        case HashKeyType::int256_key:
131
39
            this->template emplace_single<UInt256, AggData<UInt256>>(nullable);
132
39
            break;
133
36.4k
        case HashKeyType::string_key:
134
36.4k
            if (nullable) {
135
7.28k
                this->method_variant.template emplace<
136
7.28k
                        MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>>();
137
29.2k
            } else {
138
29.2k
                this->method_variant.template emplace<MethodStringNoCache<StringData>>();
139
29.2k
            }
140
36.4k
            break;
141
1.13k
        case HashKeyType::fixed64:
142
1.13k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt64>>>(
143
1.13k
                    get_key_sizes(data_types));
144
1.13k
            break;
145
1.12k
        case HashKeyType::fixed72:
146
1.12k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt72>>>(
147
1.12k
                    get_key_sizes(data_types));
148
1.12k
            break;
149
765
        case HashKeyType::fixed96:
150
765
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt96>>>(
151
765
                    get_key_sizes(data_types));
152
765
            break;
153
692
        case HashKeyType::fixed104:
154
692
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt104>>>(
155
692
                    get_key_sizes(data_types));
156
692
            break;
157
179
        case HashKeyType::fixed128:
158
179
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt128>>>(
159
179
                    get_key_sizes(data_types));
160
179
            break;
161
38.1k
        case HashKeyType::fixed136:
162
38.1k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt136>>>(
163
38.1k
                    get_key_sizes(data_types));
164
38.1k
            break;
165
3.61k
        case HashKeyType::fixed256:
166
3.61k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt256>>>(
167
3.61k
                    get_key_sizes(data_types));
168
3.61k
            break;
169
1
        default:
170
1
            throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid agg key type, type={}", type);
171
181k
        }
172
181k
    }
_ZN5doris19AggDataVariantsBaseISt7variantIJSt9monostateNS_16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS5_vEEEENS_15MethodOneNumberIhS4_IhS6_9HashCRC32IhEEEENSB_ItS4_ItS6_SC_ItEEEENSB_IjS4_IjS6_SC_IjEEEENSB_ImS4_ImS6_SC_ImEEEENS_19MethodStringNoCacheINS_13StringHashMapIS6_NS_9AllocatorILb1ELb1ELb0ENS_22DefaultMemoryAllocatorELb1EEEEEEENSB_IN4wide7integerILm128EjEES4_ISY_S6_SC_ISY_EEEENSB_INSX_ILm256EjEES4_IS12_S6_SC_IS12_EEEENSB_IjS4_IjS6_14HashMixWrapperIjSJ_EEEENSB_ImS4_ImS6_S16_ImSM_EEEENS_26MethodSingleNullableColumnINSB_IhNS_15DataWithNullKeyISE_EEEEEENS1D_INSB_ItNS1E_ISH_EEEEEENS1D_INSB_IjNS1E_ISK_EEEEEENS1D_INSB_ImNS1E_ISN_EEEEEENS1D_INSB_IjNS1E_IS18_EEEEEENS1D_INSB_ImNS1E_IS1B_EEEEEENS1D_INSB_ISY_NS1E_IS10_EEEEEENS1D_INSB_IS12_NS1E_IS14_EEEEEENS1D_INSP_INS1E_ISU_EEEEEENS_15MethodKeysFixedISN_EENS26_IS4_INS_6UInt72ES6_SC_IS28_EEEENS26_IS4_INS_6UInt96ES6_SC_IS2C_EEEENS26_IS4_INS_7UInt104ES6_SC_IS2G_EEEENS26_IS10_EENS26_IS4_INS_7UInt136ES6_SC_IS2L_EEEENS26_IS14_EEEESU_S23_E13init_agg_dataERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS2W_EENS_11HashKeyTypeE
Line
Count
Source
100
73.7k
    void init_agg_data(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
101
73.7k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
102
103
73.7k
        switch (type) {
104
1
        case HashKeyType::without_key:
105
1
            break;
106
8.08k
        case HashKeyType::serialized:
107
8.08k
            this->method_variant.template emplace<MethodSerialized<AggregatedDataWithStringKey>>();
108
8.08k
            break;
109
5.57k
        case HashKeyType::int8_key:
110
5.57k
            this->template emplace_single<UInt8, AggData<UInt8>>(nullable);
111
5.57k
            break;
112
1.00k
        case HashKeyType::int16_key:
113
1.00k
            this->template emplace_single<UInt16, AggData<UInt16>>(nullable);
114
1.00k
            break;
115
7.68k
        case HashKeyType::int32_key:
116
7.68k
            this->template emplace_single<UInt32, AggData<UInt32>>(nullable);
117
7.68k
            break;
118
18.4k
        case HashKeyType::int32_key_phase2:
119
18.4k
            this->template emplace_single<UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable);
120
18.4k
            break;
121
6.81k
        case HashKeyType::int64_key:
122
6.81k
            this->template emplace_single<UInt64, AggData<UInt64>>(nullable);
123
6.81k
            break;
124
9.68k
        case HashKeyType::int64_key_phase2:
125
9.68k
            this->template emplace_single<UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable);
126
9.68k
            break;
127
1.19k
        case HashKeyType::int128_key:
128
1.19k
            this->template emplace_single<UInt128, AggData<UInt128>>(nullable);
129
1.19k
            break;
130
39
        case HashKeyType::int256_key:
131
39
            this->template emplace_single<UInt256, AggData<UInt256>>(nullable);
132
39
            break;
133
5.77k
        case HashKeyType::string_key:
134
5.77k
            if (nullable) {
135
5.29k
                this->method_variant.template emplace<
136
5.29k
                        MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>>();
137
5.29k
            } else {
138
471
                this->method_variant.template emplace<MethodStringNoCache<StringData>>();
139
471
            }
140
5.77k
            break;
141
1.13k
        case HashKeyType::fixed64:
142
1.13k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt64>>>(
143
1.13k
                    get_key_sizes(data_types));
144
1.13k
            break;
145
1.12k
        case HashKeyType::fixed72:
146
1.12k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt72>>>(
147
1.12k
                    get_key_sizes(data_types));
148
1.12k
            break;
149
765
        case HashKeyType::fixed96:
150
765
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt96>>>(
151
765
                    get_key_sizes(data_types));
152
765
            break;
153
692
        case HashKeyType::fixed104:
154
692
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt104>>>(
155
692
                    get_key_sizes(data_types));
156
692
            break;
157
179
        case HashKeyType::fixed128:
158
179
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt128>>>(
159
179
                    get_key_sizes(data_types));
160
179
            break;
161
2.25k
        case HashKeyType::fixed136:
162
2.25k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt136>>>(
163
2.25k
                    get_key_sizes(data_types));
164
2.25k
            break;
165
3.35k
        case HashKeyType::fixed256:
166
3.35k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt256>>>(
167
3.35k
                    get_key_sizes(data_types));
168
3.35k
            break;
169
1
        default:
170
1
            throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid agg key type, type={}", type);
171
73.7k
        }
172
73.7k
    }
_ZN5doris19AggDataVariantsBaseISt7variantIJSt9monostateNS_16MethodSerializedI9PHHashMapINS_9StringRefEPc11DefaultHashIS5_vEEEENS_15MethodOneNumberIhS4_IhS6_9HashCRC32IhEEEENSB_ItS4_ItS6_SC_ItEEEENSB_IjS4_IjS6_SC_IjEEEENSB_ImS4_ImS6_SC_ImEEEENS_19MethodStringNoCacheIS9_EENSB_IN4wide7integerILm128EjEES4_IST_S6_SC_IST_EEEENSB_INSS_ILm256EjEES4_ISX_S6_SC_ISX_EEEENSB_IjS4_IjS6_14HashMixWrapperIjSJ_EEEENSB_ImS4_ImS6_S11_ImSM_EEEENS_26MethodSingleNullableColumnINSB_IhNS_15DataWithNullKeyISE_EEEEEENS18_INSB_ItNS19_ISH_EEEEEENS18_INSB_IjNS19_ISK_EEEEEENS18_INSB_ImNS19_ISN_EEEEEENS18_INSB_IjNS19_IS13_EEEEEENS18_INSB_ImNS19_IS16_EEEEEENS18_INSB_IST_NS19_ISV_EEEEEENS18_INSB_ISX_NS19_ISZ_EEEEEENS18_INSP_INS19_IS9_EEEEEENS_15MethodKeysFixedISN_EENS21_IS4_INS_6UInt72ES6_SC_IS23_EEEENS21_IS4_INS_6UInt96ES6_SC_IS27_EEEENS21_IS4_INS_7UInt104ES6_SC_IS2B_EEEENS21_ISV_EENS21_IS4_INS_7UInt136ES6_SC_IS2G_EEEENS21_ISZ_EEEES9_S1Y_E13init_agg_dataERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS2R_EENS_11HashKeyTypeE
Line
Count
Source
100
107k
    void init_agg_data(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
101
107k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
102
103
107k
        switch (type) {
104
0
        case HashKeyType::without_key:
105
0
            break;
106
15.8k
        case HashKeyType::serialized:
107
15.8k
            this->method_variant.template emplace<MethodSerialized<AggregatedDataWithStringKey>>();
108
15.8k
            break;
109
512
        case HashKeyType::int8_key:
110
512
            this->template emplace_single<UInt8, AggData<UInt8>>(nullable);
111
512
            break;
112
8.97k
        case HashKeyType::int16_key:
113
8.97k
            this->template emplace_single<UInt16, AggData<UInt16>>(nullable);
114
8.97k
            break;
115
3.85k
        case HashKeyType::int32_key:
116
3.85k
            this->template emplace_single<UInt32, AggData<UInt32>>(nullable);
117
3.85k
            break;
118
0
        case HashKeyType::int32_key_phase2:
119
0
            this->template emplace_single<UInt32, AggregatedDataWithUInt32KeyPhase2>(nullable);
120
0
            break;
121
11.2k
        case HashKeyType::int64_key:
122
11.2k
            this->template emplace_single<UInt64, AggData<UInt64>>(nullable);
123
11.2k
            break;
124
0
        case HashKeyType::int64_key_phase2:
125
0
            this->template emplace_single<UInt64, AggregatedDataWithUInt64KeyPhase2>(nullable);
126
0
            break;
127
0
        case HashKeyType::int128_key:
128
0
            this->template emplace_single<UInt128, AggData<UInt128>>(nullable);
129
0
            break;
130
0
        case HashKeyType::int256_key:
131
0
            this->template emplace_single<UInt256, AggData<UInt256>>(nullable);
132
0
            break;
133
30.7k
        case HashKeyType::string_key:
134
30.7k
            if (nullable) {
135
1.98k
                this->method_variant.template emplace<
136
1.98k
                        MethodSingleNullableColumn<MethodStringNoCache<NullableStringData>>>();
137
28.7k
            } else {
138
28.7k
                this->method_variant.template emplace<MethodStringNoCache<StringData>>();
139
28.7k
            }
140
30.7k
            break;
141
0
        case HashKeyType::fixed64:
142
0
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt64>>>(
143
0
                    get_key_sizes(data_types));
144
0
            break;
145
0
        case HashKeyType::fixed72:
146
0
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt72>>>(
147
0
                    get_key_sizes(data_types));
148
0
            break;
149
0
        case HashKeyType::fixed96:
150
0
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt96>>>(
151
0
                    get_key_sizes(data_types));
152
0
            break;
153
0
        case HashKeyType::fixed104:
154
0
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt104>>>(
155
0
                    get_key_sizes(data_types));
156
0
            break;
157
0
        case HashKeyType::fixed128:
158
0
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt128>>>(
159
0
                    get_key_sizes(data_types));
160
0
            break;
161
35.9k
        case HashKeyType::fixed136:
162
35.9k
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt136>>>(
163
35.9k
                    get_key_sizes(data_types));
164
35.9k
            break;
165
256
        case HashKeyType::fixed256:
166
256
            this->method_variant.template emplace<MethodKeysFixed<AggData<UInt256>>>(
167
256
                    get_key_sizes(data_types));
168
256
            break;
169
0
        default:
170
0
            throw Exception(ErrorCode::INTERNAL_ERROR, "meet invalid agg key type, type={}", type);
171
107k
        }
172
107k
    }
173
};
174
175
struct AggregatedDataVariants
176
        : public AggDataVariantsBase<AggregatedMethodVariants, AggregatedDataWithShortStringKey,
177
                                     AggregatedDataWithNullableShortStringKey> {
178
    AggregatedDataWithoutKey without_key = nullptr;
179
180
    bool is_fixed_key = true;
181
182
73.8k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
183
73.8k
        is_fixed_key = !(type == HashKeyType::without_key || type == HashKeyType::EMPTY ||
184
73.9k
                         type == HashKeyType::serialized || type == HashKeyType::string_key);
185
73.8k
        this->init_agg_data(data_types, type);
186
73.8k
    }
187
};
188
189
using AggregatedDataVariantsUPtr = std::unique_ptr<AggregatedDataVariants>;
190
using ArenaUPtr = std::unique_ptr<Arena>;
191
192
/// Data variants for bucketed hash aggregation.
193
/// Uses BucketedAggMethodVariants (PHHashMap for string keys).
194
struct BucketedAggDataVariants
195
        : public AggDataVariantsBase<BucketedAggMethodVariants, AggregatedDataWithStringKey,
196
                                     AggregatedDataWithNullableStringKey> {
197
108k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
198
108k
        this->init_agg_data(data_types, type);
199
108k
    }
200
};
201
202
using BucketedAggDataVariantsUPtr = std::unique_ptr<BucketedAggDataVariants>;
203
204
struct AggregateDataContainer {
205
public:
206
    AggregateDataContainer(size_t size_of_key, size_t size_of_aggregate_states)
207
71.1k
            : _size_of_key(size_of_key), _size_of_aggregate_states(size_of_aggregate_states) {}
208
209
88.8k
    int64_t memory_usage() const { return _arena_pool.size(); }
210
211
    template <typename KeyType>
212
6.87M
    AggregateDataPtr append_data(const KeyType& key) {
213
6.87M
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
6.87M
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
41.8k
            _expand();
217
41.8k
        }
218
219
6.87M
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
6.87M
        auto* aggregate_data = _current_agg_data;
221
6.87M
        ++_total_count;
222
6.87M
        ++_index_in_sub_container;
223
6.87M
        _current_agg_data += _size_of_aggregate_states;
224
6.87M
        _current_keys += _size_of_key;
225
6.87M
        return aggregate_data;
226
6.87M
    }
_ZN5doris22AggregateDataContainer11append_dataINS_9StringRefEEEPcRKT_
Line
Count
Source
212
178k
    AggregateDataPtr append_data(const KeyType& key) {
213
178k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
178k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
5.04k
            _expand();
217
5.04k
        }
218
219
178k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
178k
        auto* aggregate_data = _current_agg_data;
221
178k
        ++_total_count;
222
178k
        ++_index_in_sub_container;
223
178k
        _current_agg_data += _size_of_aggregate_states;
224
178k
        _current_keys += _size_of_key;
225
178k
        return aggregate_data;
226
178k
    }
_ZN5doris22AggregateDataContainer11append_dataIhEEPcRKT_
Line
Count
Source
212
4.89k
    AggregateDataPtr append_data(const KeyType& key) {
213
4.89k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
4.89k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
2.74k
            _expand();
217
2.74k
        }
218
219
4.89k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
4.89k
        auto* aggregate_data = _current_agg_data;
221
4.89k
        ++_total_count;
222
4.89k
        ++_index_in_sub_container;
223
4.89k
        _current_agg_data += _size_of_aggregate_states;
224
4.89k
        _current_keys += _size_of_key;
225
4.89k
        return aggregate_data;
226
4.89k
    }
_ZN5doris22AggregateDataContainer11append_dataItEEPcRKT_
Line
Count
Source
212
2.56k
    AggregateDataPtr append_data(const KeyType& key) {
213
2.56k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
2.56k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
491
            _expand();
217
491
        }
218
219
2.56k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
2.56k
        auto* aggregate_data = _current_agg_data;
221
2.56k
        ++_total_count;
222
2.56k
        ++_index_in_sub_container;
223
2.56k
        _current_agg_data += _size_of_aggregate_states;
224
2.56k
        _current_keys += _size_of_key;
225
2.56k
        return aggregate_data;
226
2.56k
    }
_ZN5doris22AggregateDataContainer11append_dataIjEEPcRKT_
Line
Count
Source
212
2.79M
    AggregateDataPtr append_data(const KeyType& key) {
213
2.79M
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
2.79M
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
17.4k
            _expand();
217
17.4k
        }
218
219
2.79M
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
2.79M
        auto* aggregate_data = _current_agg_data;
221
2.79M
        ++_total_count;
222
2.79M
        ++_index_in_sub_container;
223
2.79M
        _current_agg_data += _size_of_aggregate_states;
224
2.79M
        _current_keys += _size_of_key;
225
2.79M
        return aggregate_data;
226
2.79M
    }
_ZN5doris22AggregateDataContainer11append_dataImEEPcRKT_
Line
Count
Source
212
3.83M
    AggregateDataPtr append_data(const KeyType& key) {
213
3.83M
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
3.83M
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
12.1k
            _expand();
217
12.1k
        }
218
219
3.83M
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
3.83M
        auto* aggregate_data = _current_agg_data;
221
3.83M
        ++_total_count;
222
3.83M
        ++_index_in_sub_container;
223
3.83M
        _current_agg_data += _size_of_aggregate_states;
224
3.83M
        _current_keys += _size_of_key;
225
3.83M
        return aggregate_data;
226
3.83M
    }
_ZN5doris22AggregateDataContainer11append_dataIN4wide7integerILm128EjEEEEPcRKT_
Line
Count
Source
212
24.2k
    AggregateDataPtr append_data(const KeyType& key) {
213
24.2k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
24.2k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
960
            _expand();
217
960
        }
218
219
24.2k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
24.2k
        auto* aggregate_data = _current_agg_data;
221
24.2k
        ++_total_count;
222
24.2k
        ++_index_in_sub_container;
223
24.2k
        _current_agg_data += _size_of_aggregate_states;
224
24.2k
        _current_keys += _size_of_key;
225
24.2k
        return aggregate_data;
226
24.2k
    }
_ZN5doris22AggregateDataContainer11append_dataIN4wide7integerILm256EjEEEEPcRKT_
Line
Count
Source
212
22.9k
    AggregateDataPtr append_data(const KeyType& key) {
213
22.9k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
22.9k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
713
            _expand();
217
713
        }
218
219
22.9k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
22.9k
        auto* aggregate_data = _current_agg_data;
221
22.9k
        ++_total_count;
222
22.9k
        ++_index_in_sub_container;
223
22.9k
        _current_agg_data += _size_of_aggregate_states;
224
22.9k
        _current_keys += _size_of_key;
225
22.9k
        return aggregate_data;
226
22.9k
    }
_ZN5doris22AggregateDataContainer11append_dataINS_6UInt72EEEPcRKT_
Line
Count
Source
212
1.04k
    AggregateDataPtr append_data(const KeyType& key) {
213
1.04k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
1.04k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
426
            _expand();
217
426
        }
218
219
1.04k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
1.04k
        auto* aggregate_data = _current_agg_data;
221
1.04k
        ++_total_count;
222
1.04k
        ++_index_in_sub_container;
223
1.04k
        _current_agg_data += _size_of_aggregate_states;
224
1.04k
        _current_keys += _size_of_key;
225
1.04k
        return aggregate_data;
226
1.04k
    }
_ZN5doris22AggregateDataContainer11append_dataINS_6UInt96EEEPcRKT_
Line
Count
Source
212
7.64k
    AggregateDataPtr append_data(const KeyType& key) {
213
7.64k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
7.64k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
559
            _expand();
217
559
        }
218
219
7.64k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
7.64k
        auto* aggregate_data = _current_agg_data;
221
7.64k
        ++_total_count;
222
7.64k
        ++_index_in_sub_container;
223
7.64k
        _current_agg_data += _size_of_aggregate_states;
224
7.64k
        _current_keys += _size_of_key;
225
7.64k
        return aggregate_data;
226
7.64k
    }
_ZN5doris22AggregateDataContainer11append_dataINS_7UInt104EEEPcRKT_
Line
Count
Source
212
629
    AggregateDataPtr append_data(const KeyType& key) {
213
629
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
629
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
304
            _expand();
217
304
        }
218
219
629
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
629
        auto* aggregate_data = _current_agg_data;
221
629
        ++_total_count;
222
629
        ++_index_in_sub_container;
223
629
        _current_agg_data += _size_of_aggregate_states;
224
629
        _current_keys += _size_of_key;
225
629
        return aggregate_data;
226
629
    }
_ZN5doris22AggregateDataContainer11append_dataINS_7UInt136EEEPcRKT_
Line
Count
Source
212
1.80k
    AggregateDataPtr append_data(const KeyType& key) {
213
1.80k
        DCHECK_EQ(sizeof(KeyType), _size_of_key);
214
        // SUB_CONTAINER_CAPACITY should add a new sub container, and also expand when it is zero
215
1.80k
        if (UNLIKELY(_index_in_sub_container % SUB_CONTAINER_CAPACITY == 0)) {
216
975
            _expand();
217
975
        }
218
219
1.80k
        *reinterpret_cast<KeyType*>(_current_keys) = key;
220
1.80k
        auto* aggregate_data = _current_agg_data;
221
1.80k
        ++_total_count;
222
1.80k
        ++_index_in_sub_container;
223
1.80k
        _current_agg_data += _size_of_aggregate_states;
224
1.80k
        _current_keys += _size_of_key;
225
1.80k
        return aggregate_data;
226
1.80k
    }
227
228
    template <typename Derived, bool IsConst>
229
    class IteratorBase {
230
        using Container =
231
                std::conditional_t<IsConst, const AggregateDataContainer, AggregateDataContainer>;
232
233
        Container* container = nullptr;
234
        uint32_t index;
235
        uint32_t sub_container_index;
236
        uint32_t index_in_sub_container;
237
238
        friend class HashTable;
239
240
    public:
241
71.1k
        IteratorBase() = default;
242
        IteratorBase(Container* container_, uint32_t index_)
243
6.97M
                : container(container_), index(index_) {
244
6.97M
            sub_container_index = index / SUB_CONTAINER_CAPACITY;
245
6.97M
            index_in_sub_container = index - sub_container_index * SUB_CONTAINER_CAPACITY;
246
6.97M
        }
247
248
67.9k
        bool operator==(const IteratorBase& rhs) const { return index == rhs.index; }
249
6.84M
        bool operator!=(const IteratorBase& rhs) const { return index != rhs.index; }
250
251
6.76M
        Derived& operator++() {
252
6.76M
            index++;
253
6.76M
            index_in_sub_container++;
254
6.76M
            if (index_in_sub_container == SUB_CONTAINER_CAPACITY) {
255
630
                index_in_sub_container = 0;
256
630
                sub_container_index++;
257
630
            }
258
6.76M
            return static_cast<Derived&>(*this);
259
6.76M
        }
260
261
        template <typename KeyType>
262
6.75M
        KeyType get_key() {
263
6.75M
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
6.75M
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
6.75M
                    [index_in_sub_container];
266
6.75M
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_9StringRefEEET_v
Line
Count
Source
262
266k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
266k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
266k
                    [index_in_sub_container];
266
266k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIhEET_v
Line
Count
Source
262
4.98k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
4.98k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
4.98k
                    [index_in_sub_container];
266
4.98k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyItEET_v
Line
Count
Source
262
2.56k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
2.56k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
2.56k
                    [index_in_sub_container];
266
2.56k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIjEET_v
Line
Count
Source
262
2.57M
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
2.57M
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
2.57M
                    [index_in_sub_container];
266
2.57M
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyImEET_v
Line
Count
Source
262
3.83M
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
3.83M
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
3.83M
                    [index_in_sub_container];
266
3.83M
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIN4wide7integerILm128EjEEEET_v
Line
Count
Source
262
24.4k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
24.4k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
24.4k
                    [index_in_sub_container];
266
24.4k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyIN4wide7integerILm256EjEEEET_v
Line
Count
Source
262
43.0k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
43.0k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
43.0k
                    [index_in_sub_container];
266
43.0k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_6UInt72EEET_v
Line
Count
Source
262
1.05k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
1.05k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
1.05k
                    [index_in_sub_container];
266
1.05k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_6UInt96EEET_v
Line
Count
Source
262
7.69k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
7.69k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
7.69k
                    [index_in_sub_container];
266
7.69k
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_7UInt104EEET_v
Line
Count
Source
262
629
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
629
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
629
                    [index_in_sub_container];
266
629
        }
_ZN5doris22AggregateDataContainer12IteratorBaseINS0_8IteratorELb0EE7get_keyINS_7UInt136EEET_v
Line
Count
Source
262
1.80k
        KeyType get_key() {
263
            DCHECK_EQ(sizeof(KeyType), container->_size_of_key);
264
1.80k
            return ((KeyType*)(container->_key_containers[sub_container_index]))
265
1.80k
                    [index_in_sub_container];
266
1.80k
        }
267
268
6.61M
        AggregateDataPtr get_aggregate_data() {
269
6.61M
            return &(container->_value_containers[sub_container_index]
270
6.61M
                                                 [container->_size_of_aggregate_states *
271
6.61M
                                                  index_in_sub_container]);
272
6.61M
        }
273
    };
274
275
    class Iterator : public IteratorBase<Iterator, false> {
276
    public:
277
        using IteratorBase<Iterator, false>::IteratorBase;
278
    };
279
280
    class ConstIterator : public IteratorBase<ConstIterator, true> {
281
    public:
282
        using IteratorBase<ConstIterator, true>::IteratorBase;
283
    };
284
285
    ConstIterator begin() const { return {this, 0}; }
286
287
    ConstIterator cbegin() const { return begin(); }
288
289
67.6k
    Iterator begin() { return {this, 0}; }
290
291
    ConstIterator end() const { return {this, _total_count}; }
292
    ConstIterator cend() const { return end(); }
293
6.90M
    Iterator end() { return {this, _total_count}; }
294
295
10
    [[nodiscard]] uint32_t total_count() const { return _total_count; }
296
297
19
    size_t estimate_memory(size_t rows) const {
298
19
        bool need_to_expand = false;
299
19
        if (_total_count == 0) {
300
1
            need_to_expand = true;
301
18
        } else if ((_index_in_sub_container + rows) > SUB_CONTAINER_CAPACITY) {
302
2
            need_to_expand = true;
303
2
            rows -= (SUB_CONTAINER_CAPACITY - _index_in_sub_container);
304
2
        }
305
306
19
        if (!need_to_expand) {
307
16
            return 0;
308
16
        }
309
310
3
        size_t count = (rows + SUB_CONTAINER_CAPACITY - 1) / SUB_CONTAINER_CAPACITY;
311
3
        size_t size = _size_of_key * SUB_CONTAINER_CAPACITY;
312
3
        size += _size_of_aggregate_states * SUB_CONTAINER_CAPACITY;
313
3
        size *= count;
314
3
        return size;
315
19
    }
316
317
68.0k
    void init_once() {
318
68.0k
        if (_inited) {
319
735
            return;
320
735
        }
321
67.3k
        _inited = true;
322
67.3k
        iterator = begin();
323
67.3k
    }
324
    Iterator iterator;
325
326
private:
327
41.8k
    void _expand() {
328
41.8k
        _index_in_sub_container = 0;
329
41.8k
        _current_keys = nullptr;
330
41.8k
        _current_agg_data = nullptr;
331
41.8k
        try {
332
41.8k
            _current_keys = _arena_pool.alloc(_size_of_key * SUB_CONTAINER_CAPACITY);
333
41.8k
            _key_containers.emplace_back(_current_keys);
334
335
41.8k
            _current_agg_data = (AggregateDataPtr)_arena_pool.alloc(_size_of_aggregate_states *
336
41.8k
                                                                    SUB_CONTAINER_CAPACITY);
337
41.8k
            _value_containers.emplace_back(_current_agg_data);
338
41.8k
        } catch (...) {
339
0
            if (_current_keys) {
340
0
                _key_containers.pop_back();
341
0
                _current_keys = nullptr;
342
0
            }
343
0
            if (_current_agg_data) {
344
0
                _value_containers.pop_back();
345
0
                _current_agg_data = nullptr;
346
0
            }
347
0
            throw;
348
0
        }
349
41.8k
    }
350
351
    static constexpr uint32_t SUB_CONTAINER_CAPACITY = 8192;
352
    Arena _arena_pool;
353
    std::vector<char*> _key_containers;
354
    std::vector<AggregateDataPtr> _value_containers;
355
    AggregateDataPtr _current_agg_data = nullptr;
356
    char* _current_keys = nullptr;
357
    size_t _size_of_key {};
358
    size_t _size_of_aggregate_states {};
359
    uint32_t _index_in_sub_container {};
360
    uint32_t _total_count {};
361
    bool _inited = false;
362
};
363
} // namespace doris