Coverage Report

Created: 2026-04-10 04:05

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/aggregate/aggregate_function_statistic.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
#include <cmath>
20
#include <cstdint>
21
#include <string>
22
23
#include "common/exception.h"
24
#include "common/status.h"
25
#include "core/assert_cast.h"
26
#include "core/column/column_nullable.h"
27
#include "core/column/column_vector.h"
28
#include "core/data_type/data_type.h"
29
#include "core/data_type/data_type_nullable.h"
30
#include "core/data_type/data_type_number.h"
31
#include "core/types.h"
32
#include "exprs/aggregate/aggregate_function.h"
33
#include "exprs/aggregate/moments.h"
34
35
namespace doris {
36
37
enum class STATISTICS_FUNCTION_KIND : uint8_t { SKEW_POP, KURT_POP };
38
39
0
inline std::string to_string(STATISTICS_FUNCTION_KIND kind) {
40
0
    switch (kind) {
41
0
    case STATISTICS_FUNCTION_KIND::SKEW_POP:
42
0
        return "skewness";
43
0
    case STATISTICS_FUNCTION_KIND::KURT_POP:
44
0
        return "kurtosis";
45
0
    default:
46
0
        return "Unknown";
47
0
    }
48
0
}
49
50
template <PrimitiveType T, std::size_t _level>
51
struct StatFuncOneArg {
52
    static constexpr PrimitiveType Type = T;
53
    using Data = VarMoments<Float64, _level>;
54
    using DataType = Float64;
55
};
56
57
template <typename StatFunc, bool NullableInput>
58
class AggregateFunctionVarianceSimple
59
        : public IAggregateFunctionDataHelper<
60
                  typename StatFunc::Data,
61
                  AggregateFunctionVarianceSimple<StatFunc, NullableInput>> {
62
public:
63
    using InputCol = ColumnVector<StatFunc::Type>;
64
    using ResultCol = ColumnFloat64;
65
    using InputType = typename StatFunc::DataType;
66
67
    explicit AggregateFunctionVarianceSimple(STATISTICS_FUNCTION_KIND kind_,
68
                                             const DataTypes& argument_types_)
69
0
            : IAggregateFunctionDataHelper<
70
0
                      typename StatFunc::Data,
71
0
                      AggregateFunctionVarianceSimple<StatFunc, NullableInput>>(argument_types_),
72
0
              kind(kind_) {
73
0
        DCHECK(!argument_types_.empty());
74
0
    }
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
75
76
0
    String get_name() const override { return to_string(kind); }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE8get_nameB5cxx11Ev
77
78
0
    DataTypePtr get_return_type() const override {
79
0
        return make_nullable(std::make_shared<DataTypeFloat64>());
80
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE15get_return_typeEv
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE15get_return_typeEv
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE15get_return_typeEv
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE15get_return_typeEv
81
82
    void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
83
0
             Arena&) const override {
84
0
        if constexpr (NullableInput) {
85
0
            const ColumnNullable& column_with_nullable =
86
0
                    assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*columns[0]);
87
88
0
            if (column_with_nullable.is_null_at(row_num)) {
89
0
                return;
90
0
            } else {
91
0
                this->data(place).add(
92
0
                        (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>(
93
0
                                column_with_nullable.get_nested_column())
94
0
                                .get_data()[row_num]);
95
0
            }
96
97
0
        } else {
98
0
            this->data(place).add(
99
0
                    (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>(
100
0
                            *columns[0])
101
0
                            .get_data()[row_num]);
102
0
        }
103
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
104
105
    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
106
0
               Arena&) const override {
107
0
        this->data(place).merge(this->data(rhs));
108
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE5mergeEPcPKcRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE5mergeEPcPKcRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE5mergeEPcPKcRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE5mergeEPcPKcRNS_5ArenaE
109
110
0
    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
111
0
        this->data(place).write(buf);
112
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE9serializeEPKcRNS_14BufferWritableE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE9serializeEPKcRNS_14BufferWritableE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE9serializeEPKcRNS_14BufferWritableE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE9serializeEPKcRNS_14BufferWritableE
113
114
    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
115
0
                     Arena&) const override {
116
0
        this->data(place).read(buf);
117
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
118
119
0
    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
120
0
        const auto& data = this->data(place);
121
0
        ColumnNullable& dst_column_with_nullable = assert_cast<ColumnNullable&>(to);
122
0
        ResultCol* dst_column =
123
0
                assert_cast<ResultCol*>(&(dst_column_with_nullable.get_nested_column()));
124
125
0
        switch (kind) {
126
0
        case STATISTICS_FUNCTION_KIND::SKEW_POP: {
127
            // If input is empty set, we will get NAN from get_population()
128
0
            Float64 var_value = data.get_population();
129
0
            Float64 moments_3 = data.get_moment_3();
130
131
0
            if (std::isnan(var_value) || std::isnan(moments_3) || var_value <= 0) {
132
0
                dst_column_with_nullable.get_null_map_data().push_back(1);
133
0
                dst_column->insert_default();
134
0
            } else {
135
0
                dst_column_with_nullable.get_null_map_data().push_back(0);
136
0
                dst_column->get_data().push_back(
137
0
                        static_cast<Float64>(moments_3 / pow(var_value, 1.5)));
138
0
            }
139
0
            break;
140
0
        }
141
0
        case STATISTICS_FUNCTION_KIND::KURT_POP: {
142
0
            Float64 var_value = data.get_population();
143
0
            Float64 moments_4 = data.get_moment_4();
144
145
0
            if (std::isnan(var_value) || std::isnan(moments_4) || var_value <= 0) {
146
0
                dst_column_with_nullable.get_null_map_data().push_back(1);
147
0
                dst_column->insert_default();
148
0
            } else {
149
0
                dst_column_with_nullable.get_null_map_data().push_back(0);
150
                // kurtosis = E(X^4) / E(X^2)^2 - 3
151
0
                dst_column->get_data().push_back(
152
0
                        static_cast<Float64>(moments_4 / pow(var_value, 2)) - 3);
153
0
            }
154
0
            break;
155
0
        }
156
0
        default:
157
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Unknown statistics function kind");
158
0
        }
159
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE18insert_result_intoEPKcRNS_7IColumnE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE18insert_result_intoEPKcRNS_7IColumnE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE18insert_result_intoEPKcRNS_7IColumnE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE18insert_result_intoEPKcRNS_7IColumnE
160
161
private:
162
    STATISTICS_FUNCTION_KIND kind;
163
};
164
165
} // namespace doris