Coverage Report

Created: 2026-03-15 15:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/distinct_agg_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <variant>
21
#include <vector>
22
23
#include "core/arena.h"
24
#include "core/types.h"
25
#include "exec/common/hash_table/hash_map_context.h"
26
#include "exec/common/hash_table/hash_map_util.h"
27
#include "exec/common/hash_table/ph_hash_map.h"
28
#include "exec/common/hash_table/ph_hash_set.h"
29
#include "exec/common/hash_table/string_hash_map.h"
30
31
namespace doris {
32
33
template <typename T>
34
struct DistinctHashSetType {
35
    using HashSet = PHHashSet<T, HashCRC32<T>>;
36
};
37
38
template <>
39
struct DistinctHashSetType<UInt8> {
40
    using HashSet = SmallFixedSizeHashSet<UInt8>;
41
};
42
43
template <>
44
struct DistinctHashSetType<Int8> {
45
    using HashSet = SmallFixedSizeHashSet<Int8>;
46
};
47
48
template <typename T>
49
struct DistinctPhase2HashSetType {
50
    using HashSet = PHHashSet<T, HashMixWrapper<T>>;
51
};
52
53
template <>
54
struct DistinctPhase2HashSetType<UInt8> {
55
    using HashSet = SmallFixedSizeHashSet<UInt8>;
56
};
57
58
template <>
59
struct DistinctPhase2HashSetType<Int8> {
60
    using HashSet = SmallFixedSizeHashSet<Int8>;
61
};
62
63
template <typename T>
64
using DistinctData = typename DistinctHashSetType<T>::HashSet;
65
66
template <typename T>
67
using DistinctDataPhase2 = typename DistinctPhase2HashSetType<T>::HashSet;
68
69
using DistinctDataWithStringKey = PHHashSet<StringRef>;
70
71
// todo: Need to implement StringHashSet like StringHashMap
72
using DistinctDataWithShortStringKey = PHHashSet<StringRef>;
73
74
using DistinctMethodVariants = std::variant<
75
        std::monostate, MethodSerialized<DistinctDataWithStringKey>,
76
        MethodOneNumber<UInt8, DistinctData<UInt8>>, MethodOneNumber<UInt16, DistinctData<UInt16>>,
77
        MethodOneNumber<UInt32, DistinctData<UInt32>>,
78
        MethodOneNumber<UInt64, DistinctData<UInt64>>,
79
        MethodStringNoCache<DistinctDataWithShortStringKey>,
80
        MethodOneNumber<UInt128, DistinctData<UInt128>>,
81
        MethodOneNumber<UInt256, DistinctData<UInt256>>,
82
        MethodOneNumber<UInt32, DistinctDataPhase2<UInt32>>,
83
        MethodOneNumber<UInt64, DistinctDataPhase2<UInt64>>,
84
        MethodSingleNullableColumn<MethodOneNumber<UInt8, DataWithNullKey<DistinctData<UInt8>>>>,
85
        MethodSingleNullableColumn<MethodOneNumber<UInt16, DataWithNullKey<DistinctData<UInt16>>>>,
86
        MethodSingleNullableColumn<MethodOneNumber<UInt32, DataWithNullKey<DistinctData<UInt32>>>>,
87
        MethodSingleNullableColumn<MethodOneNumber<UInt64, DataWithNullKey<DistinctData<UInt64>>>>,
88
        MethodSingleNullableColumn<
89
                MethodOneNumber<UInt32, DataWithNullKey<DistinctDataPhase2<UInt32>>>>,
90
        MethodSingleNullableColumn<
91
                MethodOneNumber<UInt64, DataWithNullKey<DistinctDataPhase2<UInt64>>>>,
92
        MethodSingleNullableColumn<
93
                MethodOneNumber<UInt128, DataWithNullKey<DistinctData<UInt128>>>>,
94
        MethodSingleNullableColumn<
95
                MethodOneNumber<UInt256, DataWithNullKey<DistinctData<UInt256>>>>,
96
        MethodSingleNullableColumn<
97
                MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>,
98
        MethodKeysFixed<DistinctData<UInt64>>, MethodKeysFixed<DistinctData<UInt72>>,
99
        MethodKeysFixed<DistinctData<UInt96>>, MethodKeysFixed<DistinctData<UInt104>>,
100
        MethodKeysFixed<DistinctData<UInt128>>, MethodKeysFixed<DistinctData<UInt136>>,
101
        MethodKeysFixed<DistinctData<UInt256>>>;
102
103
struct DistinctDataVariants
104
        : public DataVariants<DistinctMethodVariants, MethodSingleNullableColumn, MethodOneNumber,
105
                              DataWithNullKey> {
106
509k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
107
509k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
108
509k
        switch (type) {
109
29.3k
        case HashKeyType::serialized:
110
29.3k
            method_variant.emplace<MethodSerialized<DistinctDataWithStringKey>>();
111
29.3k
            break;
112
984
        case HashKeyType::int8_key:
113
984
            emplace_single<UInt8, DistinctData<UInt8>>(nullable);
114
984
            break;
115
200
        case HashKeyType::int16_key:
116
200
            emplace_single<UInt16, DistinctData<UInt16>>(nullable);
117
200
            break;
118
1.09k
        case HashKeyType::int32_key:
119
1.09k
            emplace_single<UInt32, DistinctData<UInt32>>(nullable);
120
1.09k
            break;
121
1.67k
        case HashKeyType::int32_key_phase2:
122
1.67k
            emplace_single<UInt32, DistinctDataPhase2<UInt32>>(nullable);
123
1.67k
            break;
124
223k
        case HashKeyType::int64_key:
125
223k
            emplace_single<UInt64, DistinctData<UInt64>>(nullable);
126
223k
            break;
127
223k
        case HashKeyType::int64_key_phase2:
128
223k
            emplace_single<UInt64, DistinctDataPhase2<UInt64>>(nullable);
129
223k
            break;
130
72
        case HashKeyType::int128_key:
131
72
            emplace_single<UInt128, DistinctData<UInt128>>(nullable);
132
72
            break;
133
6
        case HashKeyType::int256_key:
134
6
            emplace_single<UInt256, DistinctData<UInt256>>(nullable);
135
6
            break;
136
1.21k
        case HashKeyType::string_key:
137
1.21k
            if (nullable) {
138
901
                method_variant.emplace<MethodSingleNullableColumn<
139
901
                        MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>>();
140
901
            } else {
141
317
                method_variant.emplace<MethodStringNoCache<DistinctDataWithShortStringKey>>();
142
317
            }
143
1.21k
            break;
144
622
        case HashKeyType::fixed64:
145
622
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt64>>>(
146
622
                    get_key_sizes(data_types));
147
622
            break;
148
981
        case HashKeyType::fixed72:
149
981
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt72>>>(
150
981
                    get_key_sizes(data_types));
151
981
            break;
152
118
        case HashKeyType::fixed96:
153
118
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt96>>>(
154
118
                    get_key_sizes(data_types));
155
118
            break;
156
1.79k
        case HashKeyType::fixed104:
157
1.79k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt104>>>(
158
1.79k
                    get_key_sizes(data_types));
159
1.79k
            break;
160
414
        case HashKeyType::fixed128:
161
414
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt128>>>(
162
414
                    get_key_sizes(data_types));
163
414
            break;
164
4.63k
        case HashKeyType::fixed136:
165
4.63k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt136>>>(
166
4.63k
                    get_key_sizes(data_types));
167
4.63k
            break;
168
21.8k
        case HashKeyType::fixed256:
169
21.8k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt256>>>(
170
21.8k
                    get_key_sizes(data_types));
171
21.8k
            break;
172
1
        default:
173
1
            throw Exception(ErrorCode::INTERNAL_ERROR,
174
1
                            "AggregatedDataVariants meet invalid key type, type={}", type);
175
509k
        }
176
509k
    }
177
};
178
179
} // namespace doris