Coverage Report

Created: 2026-03-13 14:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/distinct_agg_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <variant>
21
#include <vector>
22
23
#include "core/arena.h"
24
#include "core/types.h"
25
#include "exec/common/hash_table/hash_map_context.h"
26
#include "exec/common/hash_table/hash_map_util.h"
27
#include "exec/common/hash_table/ph_hash_map.h"
28
#include "exec/common/hash_table/ph_hash_set.h"
29
#include "exec/common/hash_table/string_hash_map.h"
30
31
namespace doris {
32
33
template <typename T>
34
struct DistinctHashSetType {
35
    using HashSet = PHHashSet<T, HashCRC32<T>>;
36
};
37
38
template <>
39
struct DistinctHashSetType<UInt8> {
40
    using HashSet = SmallFixedSizeHashSet<UInt8>;
41
};
42
43
template <>
44
struct DistinctHashSetType<Int8> {
45
    using HashSet = SmallFixedSizeHashSet<Int8>;
46
};
47
48
template <typename T>
49
struct DistinctPhase2HashSetType {
50
    using HashSet = PHHashSet<T, HashMixWrapper<T>>;
51
};
52
53
template <>
54
struct DistinctPhase2HashSetType<UInt8> {
55
    using HashSet = SmallFixedSizeHashSet<UInt8>;
56
};
57
58
template <>
59
struct DistinctPhase2HashSetType<Int8> {
60
    using HashSet = SmallFixedSizeHashSet<Int8>;
61
};
62
63
template <typename T>
64
using DistinctData = typename DistinctHashSetType<T>::HashSet;
65
66
template <typename T>
67
using DistinctDataPhase2 = typename DistinctPhase2HashSetType<T>::HashSet;
68
69
using DistinctDataWithStringKey = PHHashSet<StringRef>;
70
71
// todo: Need to implement StringHashSet like StringHashMap
72
using DistinctDataWithShortStringKey = PHHashSet<StringRef>;
73
74
using DistinctMethodVariants = std::variant<
75
        std::monostate, MethodSerialized<DistinctDataWithStringKey>,
76
        MethodOneNumber<UInt8, DistinctData<UInt8>>, MethodOneNumber<UInt16, DistinctData<UInt16>>,
77
        MethodOneNumber<UInt32, DistinctData<UInt32>>,
78
        MethodOneNumber<UInt64, DistinctData<UInt64>>,
79
        MethodStringNoCache<DistinctDataWithShortStringKey>,
80
        MethodOneNumber<UInt128, DistinctData<UInt128>>,
81
        MethodOneNumber<UInt256, DistinctData<UInt256>>,
82
        MethodOneNumber<UInt32, DistinctDataPhase2<UInt32>>,
83
        MethodOneNumber<UInt64, DistinctDataPhase2<UInt64>>,
84
        MethodSingleNullableColumn<MethodOneNumber<UInt8, DataWithNullKey<DistinctData<UInt8>>>>,
85
        MethodSingleNullableColumn<MethodOneNumber<UInt16, DataWithNullKey<DistinctData<UInt16>>>>,
86
        MethodSingleNullableColumn<MethodOneNumber<UInt32, DataWithNullKey<DistinctData<UInt32>>>>,
87
        MethodSingleNullableColumn<MethodOneNumber<UInt64, DataWithNullKey<DistinctData<UInt64>>>>,
88
        MethodSingleNullableColumn<
89
                MethodOneNumber<UInt32, DataWithNullKey<DistinctDataPhase2<UInt32>>>>,
90
        MethodSingleNullableColumn<
91
                MethodOneNumber<UInt64, DataWithNullKey<DistinctDataPhase2<UInt64>>>>,
92
        MethodSingleNullableColumn<
93
                MethodOneNumber<UInt128, DataWithNullKey<DistinctData<UInt128>>>>,
94
        MethodSingleNullableColumn<
95
                MethodOneNumber<UInt256, DataWithNullKey<DistinctData<UInt256>>>>,
96
        MethodSingleNullableColumn<
97
                MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>,
98
        MethodKeysFixed<DistinctData<UInt64>>, MethodKeysFixed<DistinctData<UInt72>>,
99
        MethodKeysFixed<DistinctData<UInt96>>, MethodKeysFixed<DistinctData<UInt104>>,
100
        MethodKeysFixed<DistinctData<UInt128>>, MethodKeysFixed<DistinctData<UInt136>>,
101
        MethodKeysFixed<DistinctData<UInt256>>>;
102
103
struct DistinctDataVariants
104
        : public DataVariants<DistinctMethodVariants, MethodSingleNullableColumn, MethodOneNumber,
105
                              DataWithNullKey> {
106
498k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
107
498k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
108
498k
        switch (type) {
109
29.2k
        case HashKeyType::serialized:
110
29.2k
            method_variant.emplace<MethodSerialized<DistinctDataWithStringKey>>();
111
29.2k
            break;
112
1.71k
        case HashKeyType::int8_key:
113
1.71k
            emplace_single<UInt8, DistinctData<UInt8>>(nullable);
114
1.71k
            break;
115
277
        case HashKeyType::int16_key:
116
277
            emplace_single<UInt16, DistinctData<UInt16>>(nullable);
117
277
            break;
118
1.26k
        case HashKeyType::int32_key:
119
1.26k
            emplace_single<UInt32, DistinctData<UInt32>>(nullable);
120
1.26k
            break;
121
1.91k
        case HashKeyType::int32_key_phase2:
122
1.91k
            emplace_single<UInt32, DistinctDataPhase2<UInt32>>(nullable);
123
1.91k
            break;
124
216k
        case HashKeyType::int64_key:
125
216k
            emplace_single<UInt64, DistinctData<UInt64>>(nullable);
126
216k
            break;
127
216k
        case HashKeyType::int64_key_phase2:
128
216k
            emplace_single<UInt64, DistinctDataPhase2<UInt64>>(nullable);
129
216k
            break;
130
172
        case HashKeyType::int128_key:
131
172
            emplace_single<UInt128, DistinctData<UInt128>>(nullable);
132
172
            break;
133
34
        case HashKeyType::int256_key:
134
34
            emplace_single<UInt256, DistinctData<UInt256>>(nullable);
135
34
            break;
136
2.34k
        case HashKeyType::string_key:
137
2.34k
            if (nullable) {
138
2.03k
                method_variant.emplace<MethodSingleNullableColumn<
139
2.03k
                        MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>>();
140
2.03k
            } else {
141
309
                method_variant.emplace<MethodStringNoCache<DistinctDataWithShortStringKey>>();
142
309
            }
143
2.34k
            break;
144
572
        case HashKeyType::fixed64:
145
572
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt64>>>(
146
572
                    get_key_sizes(data_types));
147
572
            break;
148
926
        case HashKeyType::fixed72:
149
926
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt72>>>(
150
926
                    get_key_sizes(data_types));
151
926
            break;
152
56
        case HashKeyType::fixed96:
153
56
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt96>>>(
154
56
                    get_key_sizes(data_types));
155
56
            break;
156
1.75k
        case HashKeyType::fixed104:
157
1.75k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt104>>>(
158
1.75k
                    get_key_sizes(data_types));
159
1.75k
            break;
160
366
        case HashKeyType::fixed128:
161
366
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt128>>>(
162
366
                    get_key_sizes(data_types));
163
366
            break;
164
4.66k
        case HashKeyType::fixed136:
165
4.66k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt136>>>(
166
4.66k
                    get_key_sizes(data_types));
167
4.66k
            break;
168
21.8k
        case HashKeyType::fixed256:
169
21.8k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt256>>>(
170
21.8k
                    get_key_sizes(data_types));
171
21.8k
            break;
172
1
        default:
173
1
            throw Exception(ErrorCode::INTERNAL_ERROR,
174
1
                            "AggregatedDataVariants meet invalid key type, type={}", type);
175
498k
        }
176
498k
    }
177
};
178
179
} // namespace doris