Coverage Report

Created: 2026-03-13 03:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/distinct_agg_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <variant>
21
#include <vector>
22
23
#include "core/arena.h"
24
#include "core/types.h"
25
#include "exec/common/hash_table/hash_map_context.h"
26
#include "exec/common/hash_table/hash_map_util.h"
27
#include "exec/common/hash_table/ph_hash_map.h"
28
#include "exec/common/hash_table/ph_hash_set.h"
29
#include "exec/common/hash_table/string_hash_map.h"
30
31
namespace doris {
32
33
template <typename T>
34
struct DistinctHashSetType {
35
    using HashSet = PHHashSet<T, HashCRC32<T>>;
36
};
37
38
template <>
39
struct DistinctHashSetType<UInt8> {
40
    using HashSet = SmallFixedSizeHashSet<UInt8>;
41
};
42
43
template <>
44
struct DistinctHashSetType<Int8> {
45
    using HashSet = SmallFixedSizeHashSet<Int8>;
46
};
47
48
template <typename T>
49
struct DistinctPhase2HashSetType {
50
    using HashSet = PHHashSet<T, HashMixWrapper<T>>;
51
};
52
53
template <>
54
struct DistinctPhase2HashSetType<UInt8> {
55
    using HashSet = SmallFixedSizeHashSet<UInt8>;
56
};
57
58
template <>
59
struct DistinctPhase2HashSetType<Int8> {
60
    using HashSet = SmallFixedSizeHashSet<Int8>;
61
};
62
63
template <typename T>
64
using DistinctData = typename DistinctHashSetType<T>::HashSet;
65
66
template <typename T>
67
using DistinctDataPhase2 = typename DistinctPhase2HashSetType<T>::HashSet;
68
69
using DistinctDataWithStringKey = PHHashSet<StringRef>;
70
71
// todo: Need to implement StringHashSet like StringHashMap
72
using DistinctDataWithShortStringKey = PHHashSet<StringRef>;
73
74
using DistinctMethodVariants = std::variant<
75
        std::monostate, MethodSerialized<DistinctDataWithStringKey>,
76
        MethodOneNumber<UInt8, DistinctData<UInt8>>, MethodOneNumber<UInt16, DistinctData<UInt16>>,
77
        MethodOneNumber<UInt32, DistinctData<UInt32>>,
78
        MethodOneNumber<UInt64, DistinctData<UInt64>>,
79
        MethodStringNoCache<DistinctDataWithShortStringKey>,
80
        MethodOneNumber<UInt128, DistinctData<UInt128>>,
81
        MethodOneNumber<UInt256, DistinctData<UInt256>>,
82
        MethodOneNumber<UInt32, DistinctDataPhase2<UInt32>>,
83
        MethodOneNumber<UInt64, DistinctDataPhase2<UInt64>>,
84
        MethodSingleNullableColumn<MethodOneNumber<UInt8, DataWithNullKey<DistinctData<UInt8>>>>,
85
        MethodSingleNullableColumn<MethodOneNumber<UInt16, DataWithNullKey<DistinctData<UInt16>>>>,
86
        MethodSingleNullableColumn<MethodOneNumber<UInt32, DataWithNullKey<DistinctData<UInt32>>>>,
87
        MethodSingleNullableColumn<MethodOneNumber<UInt64, DataWithNullKey<DistinctData<UInt64>>>>,
88
        MethodSingleNullableColumn<
89
                MethodOneNumber<UInt32, DataWithNullKey<DistinctDataPhase2<UInt32>>>>,
90
        MethodSingleNullableColumn<
91
                MethodOneNumber<UInt64, DataWithNullKey<DistinctDataPhase2<UInt64>>>>,
92
        MethodSingleNullableColumn<
93
                MethodOneNumber<UInt128, DataWithNullKey<DistinctData<UInt128>>>>,
94
        MethodSingleNullableColumn<
95
                MethodOneNumber<UInt256, DataWithNullKey<DistinctData<UInt256>>>>,
96
        MethodSingleNullableColumn<
97
                MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>,
98
        MethodKeysFixed<DistinctData<UInt64>>, MethodKeysFixed<DistinctData<UInt72>>,
99
        MethodKeysFixed<DistinctData<UInt96>>, MethodKeysFixed<DistinctData<UInt104>>,
100
        MethodKeysFixed<DistinctData<UInt128>>, MethodKeysFixed<DistinctData<UInt136>>,
101
        MethodKeysFixed<DistinctData<UInt256>>>;
102
103
struct DistinctDataVariants
104
        : public DataVariants<DistinctMethodVariants, MethodSingleNullableColumn, MethodOneNumber,
105
                              DataWithNullKey> {
106
612k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
107
612k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
108
612k
        switch (type) {
109
28.8k
        case HashKeyType::serialized:
110
28.8k
            method_variant.emplace<MethodSerialized<DistinctDataWithStringKey>>();
111
28.8k
            break;
112
1.01k
        case HashKeyType::int8_key:
113
1.01k
            emplace_single<UInt8, DistinctData<UInt8>>(nullable);
114
1.01k
            break;
115
232
        case HashKeyType::int16_key:
116
232
            emplace_single<UInt16, DistinctData<UInt16>>(nullable);
117
232
            break;
118
2.15k
        case HashKeyType::int32_key:
119
2.15k
            emplace_single<UInt32, DistinctData<UInt32>>(nullable);
120
2.15k
            break;
121
2.81k
        case HashKeyType::int32_key_phase2:
122
2.81k
            emplace_single<UInt32, DistinctDataPhase2<UInt32>>(nullable);
123
2.81k
            break;
124
272k
        case HashKeyType::int64_key:
125
272k
            emplace_single<UInt64, DistinctData<UInt64>>(nullable);
126
272k
            break;
127
272k
        case HashKeyType::int64_key_phase2:
128
272k
            emplace_single<UInt64, DistinctDataPhase2<UInt64>>(nullable);
129
272k
            break;
130
71
        case HashKeyType::int128_key:
131
71
            emplace_single<UInt128, DistinctData<UInt128>>(nullable);
132
71
            break;
133
14
        case HashKeyType::int256_key:
134
14
            emplace_single<UInt256, DistinctData<UInt256>>(nullable);
135
14
            break;
136
1.49k
        case HashKeyType::string_key:
137
1.49k
            if (nullable) {
138
1.26k
                method_variant.emplace<MethodSingleNullableColumn<
139
1.26k
                        MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>>();
140
1.26k
            } else {
141
226
                method_variant.emplace<MethodStringNoCache<DistinctDataWithShortStringKey>>();
142
226
            }
143
1.49k
            break;
144
380
        case HashKeyType::fixed64:
145
380
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt64>>>(
146
380
                    get_key_sizes(data_types));
147
380
            break;
148
2.23k
        case HashKeyType::fixed72:
149
2.23k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt72>>>(
150
2.23k
                    get_key_sizes(data_types));
151
2.23k
            break;
152
80
        case HashKeyType::fixed96:
153
80
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt96>>>(
154
80
                    get_key_sizes(data_types));
155
80
            break;
156
2.36k
        case HashKeyType::fixed104:
157
2.36k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt104>>>(
158
2.36k
                    get_key_sizes(data_types));
159
2.36k
            break;
160
280
        case HashKeyType::fixed128:
161
280
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt128>>>(
162
280
                    get_key_sizes(data_types));
163
280
            break;
164
4.72k
        case HashKeyType::fixed136:
165
4.72k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt136>>>(
166
4.72k
                    get_key_sizes(data_types));
167
4.72k
            break;
168
22.1k
        case HashKeyType::fixed256:
169
22.1k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt256>>>(
170
22.1k
                    get_key_sizes(data_types));
171
22.1k
            break;
172
1
        default:
173
1
            throw Exception(ErrorCode::INTERNAL_ERROR,
174
1
                            "AggregatedDataVariants meet invalid key type, type={}", type);
175
612k
        }
176
612k
    }
177
};
178
179
} // namespace doris