be/src/exec/common/distinct_agg_utils.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <variant> |
21 | | #include <vector> |
22 | | |
23 | | #include "core/arena.h" |
24 | | #include "core/types.h" |
25 | | #include "exec/common/hash_table/hash_map_context.h" |
26 | | #include "exec/common/hash_table/hash_map_util.h" |
27 | | #include "exec/common/hash_table/ph_hash_map.h" |
28 | | #include "exec/common/hash_table/ph_hash_set.h" |
29 | | #include "exec/common/hash_table/string_hash_map.h" |
30 | | |
31 | | namespace doris { |
32 | | |
33 | | template <typename T> |
34 | | struct DistinctHashSetType { |
35 | | using HashSet = PHHashSet<T, HashCRC32<T>>; |
36 | | }; |
37 | | |
38 | | template <> |
39 | | struct DistinctHashSetType<UInt8> { |
40 | | using HashSet = SmallFixedSizeHashSet<UInt8>; |
41 | | }; |
42 | | |
43 | | template <> |
44 | | struct DistinctHashSetType<Int8> { |
45 | | using HashSet = SmallFixedSizeHashSet<Int8>; |
46 | | }; |
47 | | |
48 | | template <typename T> |
49 | | struct DistinctPhase2HashSetType { |
50 | | using HashSet = PHHashSet<T, HashMixWrapper<T>>; |
51 | | }; |
52 | | |
53 | | template <> |
54 | | struct DistinctPhase2HashSetType<UInt8> { |
55 | | using HashSet = SmallFixedSizeHashSet<UInt8>; |
56 | | }; |
57 | | |
58 | | template <> |
59 | | struct DistinctPhase2HashSetType<Int8> { |
60 | | using HashSet = SmallFixedSizeHashSet<Int8>; |
61 | | }; |
62 | | |
63 | | template <typename T> |
64 | | using DistinctData = typename DistinctHashSetType<T>::HashSet; |
65 | | |
66 | | template <typename T> |
67 | | using DistinctDataPhase2 = typename DistinctPhase2HashSetType<T>::HashSet; |
68 | | |
69 | | using DistinctDataWithStringKey = PHHashSet<StringRef>; |
70 | | |
71 | | // todo: Need to implement StringHashSet like StringHashMap |
72 | | using DistinctDataWithShortStringKey = PHHashSet<StringRef>; |
73 | | |
74 | | using DistinctMethodVariants = std::variant< |
75 | | std::monostate, MethodSerialized<DistinctDataWithStringKey>, |
76 | | MethodOneNumber<UInt8, DistinctData<UInt8>>, MethodOneNumber<UInt16, DistinctData<UInt16>>, |
77 | | MethodOneNumber<UInt32, DistinctData<UInt32>>, |
78 | | MethodOneNumber<UInt64, DistinctData<UInt64>>, |
79 | | MethodStringNoCache<DistinctDataWithShortStringKey>, |
80 | | MethodOneNumber<UInt128, DistinctData<UInt128>>, |
81 | | MethodOneNumber<UInt256, DistinctData<UInt256>>, |
82 | | MethodOneNumber<UInt32, DistinctDataPhase2<UInt32>>, |
83 | | MethodOneNumber<UInt64, DistinctDataPhase2<UInt64>>, |
84 | | MethodSingleNullableColumn<MethodOneNumber<UInt8, DataWithNullKey<DistinctData<UInt8>>>>, |
85 | | MethodSingleNullableColumn<MethodOneNumber<UInt16, DataWithNullKey<DistinctData<UInt16>>>>, |
86 | | MethodSingleNullableColumn<MethodOneNumber<UInt32, DataWithNullKey<DistinctData<UInt32>>>>, |
87 | | MethodSingleNullableColumn<MethodOneNumber<UInt64, DataWithNullKey<DistinctData<UInt64>>>>, |
88 | | MethodSingleNullableColumn< |
89 | | MethodOneNumber<UInt32, DataWithNullKey<DistinctDataPhase2<UInt32>>>>, |
90 | | MethodSingleNullableColumn< |
91 | | MethodOneNumber<UInt64, DataWithNullKey<DistinctDataPhase2<UInt64>>>>, |
92 | | MethodSingleNullableColumn< |
93 | | MethodOneNumber<UInt128, DataWithNullKey<DistinctData<UInt128>>>>, |
94 | | MethodSingleNullableColumn< |
95 | | MethodOneNumber<UInt256, DataWithNullKey<DistinctData<UInt256>>>>, |
96 | | MethodSingleNullableColumn< |
97 | | MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>, |
98 | | MethodKeysFixed<DistinctData<UInt64>>, MethodKeysFixed<DistinctData<UInt72>>, |
99 | | MethodKeysFixed<DistinctData<UInt96>>, MethodKeysFixed<DistinctData<UInt104>>, |
100 | | MethodKeysFixed<DistinctData<UInt128>>, MethodKeysFixed<DistinctData<UInt136>>, |
101 | | MethodKeysFixed<DistinctData<UInt256>>>; |
102 | | |
103 | | struct DistinctDataVariants |
104 | | : public DataVariants<DistinctMethodVariants, MethodSingleNullableColumn, MethodOneNumber, |
105 | | DataWithNullKey> { |
106 | 612k | void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) { |
107 | 612k | bool nullable = data_types.size() == 1 && data_types[0]->is_nullable(); |
108 | 612k | switch (type) { |
109 | 28.8k | case HashKeyType::serialized: |
110 | 28.8k | method_variant.emplace<MethodSerialized<DistinctDataWithStringKey>>(); |
111 | 28.8k | break; |
112 | 1.01k | case HashKeyType::int8_key: |
113 | 1.01k | emplace_single<UInt8, DistinctData<UInt8>>(nullable); |
114 | 1.01k | break; |
115 | 232 | case HashKeyType::int16_key: |
116 | 232 | emplace_single<UInt16, DistinctData<UInt16>>(nullable); |
117 | 232 | break; |
118 | 2.15k | case HashKeyType::int32_key: |
119 | 2.15k | emplace_single<UInt32, DistinctData<UInt32>>(nullable); |
120 | 2.15k | break; |
121 | 2.81k | case HashKeyType::int32_key_phase2: |
122 | 2.81k | emplace_single<UInt32, DistinctDataPhase2<UInt32>>(nullable); |
123 | 2.81k | break; |
124 | 272k | case HashKeyType::int64_key: |
125 | 272k | emplace_single<UInt64, DistinctData<UInt64>>(nullable); |
126 | 272k | break; |
127 | 272k | case HashKeyType::int64_key_phase2: |
128 | 272k | emplace_single<UInt64, DistinctDataPhase2<UInt64>>(nullable); |
129 | 272k | break; |
130 | 71 | case HashKeyType::int128_key: |
131 | 71 | emplace_single<UInt128, DistinctData<UInt128>>(nullable); |
132 | 71 | break; |
133 | 14 | case HashKeyType::int256_key: |
134 | 14 | emplace_single<UInt256, DistinctData<UInt256>>(nullable); |
135 | 14 | break; |
136 | 1.49k | case HashKeyType::string_key: |
137 | 1.49k | if (nullable) { |
138 | 1.26k | method_variant.emplace<MethodSingleNullableColumn< |
139 | 1.26k | MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>>(); |
140 | 1.26k | } else { |
141 | 226 | method_variant.emplace<MethodStringNoCache<DistinctDataWithShortStringKey>>(); |
142 | 226 | } |
143 | 1.49k | break; |
144 | 380 | case HashKeyType::fixed64: |
145 | 380 | method_variant.emplace<MethodKeysFixed<DistinctData<UInt64>>>( |
146 | 380 | get_key_sizes(data_types)); |
147 | 380 | break; |
148 | 2.23k | case HashKeyType::fixed72: |
149 | 2.23k | method_variant.emplace<MethodKeysFixed<DistinctData<UInt72>>>( |
150 | 2.23k | get_key_sizes(data_types)); |
151 | 2.23k | break; |
152 | 80 | case HashKeyType::fixed96: |
153 | 80 | method_variant.emplace<MethodKeysFixed<DistinctData<UInt96>>>( |
154 | 80 | get_key_sizes(data_types)); |
155 | 80 | break; |
156 | 2.36k | case HashKeyType::fixed104: |
157 | 2.36k | method_variant.emplace<MethodKeysFixed<DistinctData<UInt104>>>( |
158 | 2.36k | get_key_sizes(data_types)); |
159 | 2.36k | break; |
160 | 280 | case HashKeyType::fixed128: |
161 | 280 | method_variant.emplace<MethodKeysFixed<DistinctData<UInt128>>>( |
162 | 280 | get_key_sizes(data_types)); |
163 | 280 | break; |
164 | 4.72k | case HashKeyType::fixed136: |
165 | 4.72k | method_variant.emplace<MethodKeysFixed<DistinctData<UInt136>>>( |
166 | 4.72k | get_key_sizes(data_types)); |
167 | 4.72k | break; |
168 | 22.1k | case HashKeyType::fixed256: |
169 | 22.1k | method_variant.emplace<MethodKeysFixed<DistinctData<UInt256>>>( |
170 | 22.1k | get_key_sizes(data_types)); |
171 | 22.1k | break; |
172 | 1 | default: |
173 | 1 | throw Exception(ErrorCode::INTERNAL_ERROR, |
174 | 1 | "AggregatedDataVariants meet invalid key type, type={}", type); |
175 | 612k | } |
176 | 612k | } |
177 | | }; |
178 | | |
179 | | } // namespace doris |