be/src/exprs/aggregate/aggregate_function_statistic.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | #include <cmath> |
20 | | #include <cstdint> |
21 | | #include <string> |
22 | | |
23 | | #include "common/exception.h" |
24 | | #include "common/status.h" |
25 | | #include "core/assert_cast.h" |
26 | | #include "core/column/column_nullable.h" |
27 | | #include "core/column/column_vector.h" |
28 | | #include "core/data_type/data_type.h" |
29 | | #include "core/data_type/data_type_nullable.h" |
30 | | #include "core/data_type/data_type_number.h" |
31 | | #include "core/types.h" |
32 | | #include "exprs/aggregate/aggregate_function.h" |
33 | | #include "exprs/aggregate/moments.h" |
34 | | |
35 | | namespace doris { |
36 | | |
37 | | enum class STATISTICS_FUNCTION_KIND : uint8_t { SKEW_POP, KURT_POP }; |
38 | | |
39 | 0 | inline std::string to_string(STATISTICS_FUNCTION_KIND kind) { |
40 | 0 | switch (kind) { |
41 | 0 | case STATISTICS_FUNCTION_KIND::SKEW_POP: |
42 | 0 | return "skewness"; |
43 | 0 | case STATISTICS_FUNCTION_KIND::KURT_POP: |
44 | 0 | return "kurtosis"; |
45 | 0 | default: |
46 | 0 | return "Unknown"; |
47 | 0 | } |
48 | 0 | } |
49 | | |
50 | | template <PrimitiveType T, std::size_t _level> |
51 | | struct StatFuncOneArg { |
52 | | static constexpr PrimitiveType Type = T; |
53 | | using Data = VarMoments<Float64, _level>; |
54 | | using DataType = Float64; |
55 | | }; |
56 | | |
57 | | template <typename StatFunc, bool NullableInput> |
58 | | class AggregateFunctionVarianceSimple |
59 | | : public IAggregateFunctionDataHelper< |
60 | | typename StatFunc::Data, |
61 | | AggregateFunctionVarianceSimple<StatFunc, NullableInput>> { |
62 | | public: |
63 | | using InputCol = ColumnVector<StatFunc::Type>; |
64 | | using ResultCol = ColumnFloat64; |
65 | | using InputType = typename StatFunc::DataType; |
66 | | |
67 | | explicit AggregateFunctionVarianceSimple(STATISTICS_FUNCTION_KIND kind_, |
68 | | const DataTypes& argument_types_) |
69 | 0 | : IAggregateFunctionDataHelper< |
70 | 0 | typename StatFunc::Data, |
71 | 0 | AggregateFunctionVarianceSimple<StatFunc, NullableInput>>(argument_types_), |
72 | 0 | kind(kind_) { |
73 | 0 | DCHECK(!argument_types_.empty()); |
74 | 0 | } Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE |
75 | | |
76 | 0 | String get_name() const override { return to_string(kind); }Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE8get_nameB5cxx11Ev |
77 | | |
78 | 0 | DataTypePtr get_return_type() const override { |
79 | 0 | return make_nullable(std::make_shared<DataTypeFloat64>()); |
80 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE15get_return_typeEv |
81 | | |
82 | | void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, |
83 | 0 | Arena&) const override { |
84 | 0 | if constexpr (NullableInput) { |
85 | 0 | const ColumnNullable& column_with_nullable = |
86 | 0 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*columns[0]); |
87 | |
|
88 | 0 | if (column_with_nullable.is_null_at(row_num)) { |
89 | 0 | return; |
90 | 0 | } else { |
91 | 0 | this->data(place).add( |
92 | 0 | (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>( |
93 | 0 | column_with_nullable.get_nested_column()) |
94 | 0 | .get_data()[row_num]); |
95 | 0 | } |
96 | |
|
97 | 0 | } else { |
98 | 0 | this->data(place).add( |
99 | 0 | (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>( |
100 | 0 | *columns[0]) |
101 | 0 | .get_data()[row_num]); |
102 | 0 | } |
103 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE |
104 | | |
105 | | void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, |
106 | 0 | Arena&) const override { |
107 | 0 | this->data(place).merge(this->data(rhs)); |
108 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE5mergeEPcPKcRNS_5ArenaE |
109 | | |
110 | 0 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { |
111 | 0 | this->data(place).write(buf); |
112 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE9serializeEPKcRNS_14BufferWritableE |
113 | | |
114 | | void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, |
115 | 0 | Arena&) const override { |
116 | 0 | this->data(place).read(buf); |
117 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE |
118 | | |
119 | 0 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
120 | 0 | const auto& data = this->data(place); |
121 | 0 | ColumnNullable& dst_column_with_nullable = assert_cast<ColumnNullable&>(to); |
122 | 0 | ResultCol* dst_column = |
123 | 0 | assert_cast<ResultCol*>(&(dst_column_with_nullable.get_nested_column())); |
124 | |
|
125 | 0 | switch (kind) { |
126 | 0 | case STATISTICS_FUNCTION_KIND::SKEW_POP: { |
127 | | // If input is empty set, we will get NAN from get_population() |
128 | 0 | Float64 var_value = data.get_population(); |
129 | 0 | Float64 moments_3 = data.get_moment_3(); |
130 | |
|
131 | 0 | if (std::isnan(var_value) || std::isnan(moments_3) || var_value <= 0) { |
132 | 0 | dst_column_with_nullable.get_null_map_data().push_back(1); |
133 | 0 | dst_column->insert_default(); |
134 | 0 | } else { |
135 | 0 | dst_column_with_nullable.get_null_map_data().push_back(0); |
136 | 0 | dst_column->get_data().push_back( |
137 | 0 | static_cast<Float64>(moments_3 / pow(var_value, 1.5))); |
138 | 0 | } |
139 | 0 | break; |
140 | 0 | } |
141 | 0 | case STATISTICS_FUNCTION_KIND::KURT_POP: { |
142 | 0 | Float64 var_value = data.get_population(); |
143 | 0 | Float64 moments_4 = data.get_moment_4(); |
144 | |
|
145 | 0 | if (std::isnan(var_value) || std::isnan(moments_4) || var_value <= 0) { |
146 | 0 | dst_column_with_nullable.get_null_map_data().push_back(1); |
147 | 0 | dst_column->insert_default(); |
148 | 0 | } else { |
149 | 0 | dst_column_with_nullable.get_null_map_data().push_back(0); |
150 | | // kurtosis = E(X^4) / E(X^2)^2 - 3 |
151 | 0 | dst_column->get_data().push_back( |
152 | 0 | static_cast<Float64>(moments_4 / pow(var_value, 2)) - 3); |
153 | 0 | } |
154 | 0 | break; |
155 | 0 | } |
156 | 0 | default: |
157 | 0 | throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Unknown statistics function kind"); |
158 | 0 | } |
159 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE18insert_result_intoEPKcRNS_7IColumnE |
160 | | |
161 | | private: |
162 | | STATISTICS_FUNCTION_KIND kind; |
163 | | }; |
164 | | |
165 | | } // namespace doris |