Coverage Report

Created: 2026-04-07 14:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exec/common/distinct_agg_utils.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <variant>
21
#include <vector>
22
23
#include "core/arena.h"
24
#include "core/types.h"
25
#include "exec/common/hash_table/hash_map_context.h"
26
#include "exec/common/hash_table/hash_map_util.h"
27
#include "exec/common/hash_table/ph_hash_map.h"
28
#include "exec/common/hash_table/ph_hash_set.h"
29
#include "exec/common/hash_table/string_hash_map.h"
30
31
namespace doris {
32
33
template <typename T>
34
struct DistinctHashSetType {
35
    using HashSet = PHHashSet<T, HashCRC32<T>>;
36
};
37
38
template <>
39
struct DistinctHashSetType<UInt8> {
40
    using HashSet = SmallFixedSizeHashSet<UInt8>;
41
};
42
43
template <>
44
struct DistinctHashSetType<Int8> {
45
    using HashSet = SmallFixedSizeHashSet<Int8>;
46
};
47
48
template <typename T>
49
struct DistinctPhase2HashSetType {
50
    using HashSet = PHHashSet<T, HashMixWrapper<T>>;
51
};
52
53
template <>
54
struct DistinctPhase2HashSetType<UInt8> {
55
    using HashSet = SmallFixedSizeHashSet<UInt8>;
56
};
57
58
template <>
59
struct DistinctPhase2HashSetType<Int8> {
60
    using HashSet = SmallFixedSizeHashSet<Int8>;
61
};
62
63
template <typename T>
64
using DistinctData = typename DistinctHashSetType<T>::HashSet;
65
66
template <typename T>
67
using DistinctDataPhase2 = typename DistinctPhase2HashSetType<T>::HashSet;
68
69
using DistinctDataWithStringKey = PHHashSet<StringRef>;
70
71
// todo: Need to implement StringHashSet like StringHashMap
72
using DistinctDataWithShortStringKey = PHHashSet<StringRef>;
73
74
using DistinctMethodVariants = std::variant<
75
        std::monostate, MethodSerialized<DistinctDataWithStringKey>,
76
        MethodOneNumber<UInt8, DistinctData<UInt8>>, MethodOneNumber<UInt16, DistinctData<UInt16>>,
77
        MethodOneNumber<UInt32, DistinctData<UInt32>>,
78
        MethodOneNumber<UInt64, DistinctData<UInt64>>,
79
        MethodStringNoCache<DistinctDataWithShortStringKey>,
80
        MethodOneNumber<UInt128, DistinctData<UInt128>>,
81
        MethodOneNumber<UInt256, DistinctData<UInt256>>,
82
        MethodOneNumber<UInt32, DistinctDataPhase2<UInt32>>,
83
        MethodOneNumber<UInt64, DistinctDataPhase2<UInt64>>,
84
        MethodSingleNullableColumn<MethodOneNumber<UInt8, DataWithNullKey<DistinctData<UInt8>>>>,
85
        MethodSingleNullableColumn<MethodOneNumber<UInt16, DataWithNullKey<DistinctData<UInt16>>>>,
86
        MethodSingleNullableColumn<MethodOneNumber<UInt32, DataWithNullKey<DistinctData<UInt32>>>>,
87
        MethodSingleNullableColumn<MethodOneNumber<UInt64, DataWithNullKey<DistinctData<UInt64>>>>,
88
        MethodSingleNullableColumn<
89
                MethodOneNumber<UInt32, DataWithNullKey<DistinctDataPhase2<UInt32>>>>,
90
        MethodSingleNullableColumn<
91
                MethodOneNumber<UInt64, DataWithNullKey<DistinctDataPhase2<UInt64>>>>,
92
        MethodSingleNullableColumn<
93
                MethodOneNumber<UInt128, DataWithNullKey<DistinctData<UInt128>>>>,
94
        MethodSingleNullableColumn<
95
                MethodOneNumber<UInt256, DataWithNullKey<DistinctData<UInt256>>>>,
96
        MethodSingleNullableColumn<
97
                MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>,
98
        MethodKeysFixed<DistinctData<UInt64>>, MethodKeysFixed<DistinctData<UInt72>>,
99
        MethodKeysFixed<DistinctData<UInt96>>, MethodKeysFixed<DistinctData<UInt104>>,
100
        MethodKeysFixed<DistinctData<UInt128>>, MethodKeysFixed<DistinctData<UInt136>>,
101
        MethodKeysFixed<DistinctData<UInt256>>>;
102
103
struct DistinctDataVariants
104
        : public DataVariants<DistinctMethodVariants, MethodSingleNullableColumn, MethodOneNumber,
105
                              DataWithNullKey> {
106
445k
    void init(const std::vector<DataTypePtr>& data_types, HashKeyType type) {
107
445k
        bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
108
445k
        switch (type) {
109
21.1k
        case HashKeyType::serialized:
110
21.1k
            method_variant.emplace<MethodSerialized<DistinctDataWithStringKey>>();
111
21.1k
            break;
112
1.01k
        case HashKeyType::int8_key:
113
1.01k
            emplace_single<UInt8, DistinctData<UInt8>>(nullable);
114
1.01k
            break;
115
159
        case HashKeyType::int16_key:
116
159
            emplace_single<UInt16, DistinctData<UInt16>>(nullable);
117
159
            break;
118
2.00k
        case HashKeyType::int32_key:
119
2.00k
            emplace_single<UInt32, DistinctData<UInt32>>(nullable);
120
2.00k
            break;
121
2.59k
        case HashKeyType::int32_key_phase2:
122
2.59k
            emplace_single<UInt32, DistinctDataPhase2<UInt32>>(nullable);
123
2.59k
            break;
124
201k
        case HashKeyType::int64_key:
125
201k
            emplace_single<UInt64, DistinctData<UInt64>>(nullable);
126
201k
            break;
127
202k
        case HashKeyType::int64_key_phase2:
128
202k
            emplace_single<UInt64, DistinctDataPhase2<UInt64>>(nullable);
129
202k
            break;
130
113
        case HashKeyType::int128_key:
131
113
            emplace_single<UInt128, DistinctData<UInt128>>(nullable);
132
113
            break;
133
8
        case HashKeyType::int256_key:
134
8
            emplace_single<UInt256, DistinctData<UInt256>>(nullable);
135
8
            break;
136
1.92k
        case HashKeyType::string_key:
137
1.92k
            if (nullable) {
138
1.67k
                method_variant.emplace<MethodSingleNullableColumn<
139
1.67k
                        MethodStringNoCache<DataWithNullKey<DistinctDataWithShortStringKey>>>>();
140
1.67k
            } else {
141
251
                method_variant.emplace<MethodStringNoCache<DistinctDataWithShortStringKey>>();
142
251
            }
143
1.92k
            break;
144
472
        case HashKeyType::fixed64:
145
472
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt64>>>(
146
472
                    get_key_sizes(data_types));
147
472
            break;
148
2.28k
        case HashKeyType::fixed72:
149
2.28k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt72>>>(
150
2.28k
                    get_key_sizes(data_types));
151
2.28k
            break;
152
62
        case HashKeyType::fixed96:
153
62
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt96>>>(
154
62
                    get_key_sizes(data_types));
155
62
            break;
156
1.06k
        case HashKeyType::fixed104:
157
1.06k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt104>>>(
158
1.06k
                    get_key_sizes(data_types));
159
1.06k
            break;
160
360
        case HashKeyType::fixed128:
161
360
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt128>>>(
162
360
                    get_key_sizes(data_types));
163
360
            break;
164
2.11k
        case HashKeyType::fixed136:
165
2.11k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt136>>>(
166
2.11k
                    get_key_sizes(data_types));
167
2.11k
            break;
168
7.60k
        case HashKeyType::fixed256:
169
7.60k
            method_variant.emplace<MethodKeysFixed<DistinctData<UInt256>>>(
170
7.60k
                    get_key_sizes(data_types));
171
7.60k
            break;
172
1
        default:
173
1
            throw Exception(ErrorCode::INTERNAL_ERROR,
174
1
                            "AggregatedDataVariants meet invalid key type, type={}", type);
175
445k
        }
176
445k
    }
177
};
178
179
} // namespace doris