be/src/exprs/aggregate/aggregate_function_statistic.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | #include <cmath> |
20 | | #include <cstdint> |
21 | | #include <string> |
22 | | |
23 | | #include "common/exception.h" |
24 | | #include "common/status.h" |
25 | | #include "core/assert_cast.h" |
26 | | #include "core/column/column_nullable.h" |
27 | | #include "core/column/column_vector.h" |
28 | | #include "core/data_type/data_type.h" |
29 | | #include "core/data_type/data_type_nullable.h" |
30 | | #include "core/data_type/data_type_number.h" |
31 | | #include "core/types.h" |
32 | | #include "exprs/aggregate/aggregate_function.h" |
33 | | #include "exprs/aggregate/moments.h" |
34 | | |
35 | | namespace doris { |
36 | | #include "common/compile_check_begin.h" |
37 | | |
38 | | enum class STATISTICS_FUNCTION_KIND : uint8_t { SKEW_POP, KURT_POP }; |
39 | | |
40 | 0 | inline std::string to_string(STATISTICS_FUNCTION_KIND kind) { |
41 | 0 | switch (kind) { |
42 | 0 | case STATISTICS_FUNCTION_KIND::SKEW_POP: |
43 | 0 | return "skewness"; |
44 | 0 | case STATISTICS_FUNCTION_KIND::KURT_POP: |
45 | 0 | return "kurtosis"; |
46 | 0 | default: |
47 | 0 | return "Unknown"; |
48 | 0 | } |
49 | 0 | } |
50 | | |
51 | | template <PrimitiveType T, std::size_t _level> |
52 | | struct StatFuncOneArg { |
53 | | static constexpr PrimitiveType Type = T; |
54 | | using Data = VarMoments<Float64, _level>; |
55 | | using DataType = Float64; |
56 | | }; |
57 | | |
58 | | template <typename StatFunc, bool NullableInput> |
59 | | class AggregateFunctionVarianceSimple |
60 | | : public IAggregateFunctionDataHelper< |
61 | | typename StatFunc::Data, |
62 | | AggregateFunctionVarianceSimple<StatFunc, NullableInput>> { |
63 | | public: |
64 | | using InputCol = ColumnVector<StatFunc::Type>; |
65 | | using ResultCol = ColumnFloat64; |
66 | | using InputType = typename StatFunc::DataType; |
67 | | |
68 | | explicit AggregateFunctionVarianceSimple(STATISTICS_FUNCTION_KIND kind_, |
69 | | const DataTypes& argument_types_) |
70 | 0 | : IAggregateFunctionDataHelper< |
71 | 0 | typename StatFunc::Data, |
72 | 0 | AggregateFunctionVarianceSimple<StatFunc, NullableInput>>(argument_types_), |
73 | 0 | kind(kind_) { |
74 | 0 | DCHECK(!argument_types_.empty()); |
75 | 0 | } Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE |
76 | | |
77 | 0 | String get_name() const override { return to_string(kind); }Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE8get_nameB5cxx11Ev |
78 | | |
79 | 0 | DataTypePtr get_return_type() const override { |
80 | 0 | return make_nullable(std::make_shared<DataTypeFloat64>()); |
81 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE15get_return_typeEv Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE15get_return_typeEv |
82 | | |
83 | | void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, |
84 | 0 | Arena&) const override { |
85 | 0 | if constexpr (NullableInput) { |
86 | 0 | const ColumnNullable& column_with_nullable = |
87 | 0 | assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*columns[0]); |
88 | |
|
89 | 0 | if (column_with_nullable.is_null_at(row_num)) { |
90 | 0 | return; |
91 | 0 | } else { |
92 | 0 | this->data(place).add( |
93 | 0 | (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>( |
94 | 0 | column_with_nullable.get_nested_column()) |
95 | 0 | .get_data()[row_num]); |
96 | 0 | } |
97 | |
|
98 | 0 | } else { |
99 | 0 | this->data(place).add( |
100 | 0 | (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>( |
101 | 0 | *columns[0]) |
102 | 0 | .get_data()[row_num]); |
103 | 0 | } |
104 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE |
105 | | |
106 | | void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, |
107 | 0 | Arena&) const override { |
108 | 0 | this->data(place).merge(this->data(rhs)); |
109 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE5mergeEPcPKcRNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE5mergeEPcPKcRNS_5ArenaE |
110 | | |
111 | 0 | void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { |
112 | 0 | this->data(place).write(buf); |
113 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE9serializeEPKcRNS_14BufferWritableE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE9serializeEPKcRNS_14BufferWritableE |
114 | | |
115 | | void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, |
116 | 0 | Arena&) const override { |
117 | 0 | this->data(place).read(buf); |
118 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE |
119 | | |
120 | 0 | void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { |
121 | 0 | const auto& data = this->data(place); |
122 | 0 | ColumnNullable& dst_column_with_nullable = assert_cast<ColumnNullable&>(to); |
123 | 0 | ResultCol* dst_column = |
124 | 0 | assert_cast<ResultCol*>(&(dst_column_with_nullable.get_nested_column())); |
125 | |
|
126 | 0 | switch (kind) { |
127 | 0 | case STATISTICS_FUNCTION_KIND::SKEW_POP: { |
128 | | // If input is empty set, we will get NAN from get_population() |
129 | 0 | Float64 var_value = data.get_population(); |
130 | 0 | Float64 moments_3 = data.get_moment_3(); |
131 | |
|
132 | 0 | if (std::isnan(var_value) || std::isnan(moments_3) || var_value <= 0) { |
133 | 0 | dst_column_with_nullable.get_null_map_data().push_back(1); |
134 | 0 | dst_column->insert_default(); |
135 | 0 | } else { |
136 | 0 | dst_column_with_nullable.get_null_map_data().push_back(0); |
137 | 0 | dst_column->get_data().push_back( |
138 | 0 | static_cast<Float64>(moments_3 / pow(var_value, 1.5))); |
139 | 0 | } |
140 | 0 | break; |
141 | 0 | } |
142 | 0 | case STATISTICS_FUNCTION_KIND::KURT_POP: { |
143 | 0 | Float64 var_value = data.get_population(); |
144 | 0 | Float64 moments_4 = data.get_moment_4(); |
145 | |
|
146 | 0 | if (std::isnan(var_value) || std::isnan(moments_4) || var_value <= 0) { |
147 | 0 | dst_column_with_nullable.get_null_map_data().push_back(1); |
148 | 0 | dst_column->insert_default(); |
149 | 0 | } else { |
150 | 0 | dst_column_with_nullable.get_null_map_data().push_back(0); |
151 | | // kurtosis = E(X^4) / E(X^2)^2 - 3 |
152 | 0 | dst_column->get_data().push_back( |
153 | 0 | static_cast<Float64>(moments_4 / pow(var_value, 2)) - 3); |
154 | 0 | } |
155 | 0 | break; |
156 | 0 | } |
157 | 0 | default: |
158 | 0 | throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Unknown statistics function kind"); |
159 | 0 | } |
160 | 0 | } Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE18insert_result_intoEPKcRNS_7IColumnE Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE18insert_result_intoEPKcRNS_7IColumnE |
161 | | |
162 | | private: |
163 | | STATISTICS_FUNCTION_KIND kind; |
164 | | }; |
165 | | |
166 | | } // namespace doris |
167 | | #include "common/compile_check_end.h" |