Coverage Report

Created: 2026-03-12 14:02

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/aggregate/aggregate_function_statistic.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
#include <cmath>
20
#include <cstdint>
21
#include <string>
22
23
#include "common/exception.h"
24
#include "common/status.h"
25
#include "core/assert_cast.h"
26
#include "core/column/column_nullable.h"
27
#include "core/column/column_vector.h"
28
#include "core/data_type/data_type.h"
29
#include "core/data_type/data_type_nullable.h"
30
#include "core/data_type/data_type_number.h"
31
#include "core/types.h"
32
#include "exprs/aggregate/aggregate_function.h"
33
#include "exprs/aggregate/moments.h"
34
35
namespace doris {
36
#include "common/compile_check_begin.h"
37
38
enum class STATISTICS_FUNCTION_KIND : uint8_t { SKEW_POP, KURT_POP };
39
40
0
inline std::string to_string(STATISTICS_FUNCTION_KIND kind) {
41
0
    switch (kind) {
42
0
    case STATISTICS_FUNCTION_KIND::SKEW_POP:
43
0
        return "skewness";
44
0
    case STATISTICS_FUNCTION_KIND::KURT_POP:
45
0
        return "kurtosis";
46
0
    default:
47
0
        return "Unknown";
48
0
    }
49
0
}
50
51
template <PrimitiveType T, std::size_t _level>
52
struct StatFuncOneArg {
53
    static constexpr PrimitiveType Type = T;
54
    using Data = VarMoments<Float64, _level>;
55
    using DataType = Float64;
56
};
57
58
template <typename StatFunc, bool NullableInput>
59
class AggregateFunctionVarianceSimple
60
        : public IAggregateFunctionDataHelper<
61
                  typename StatFunc::Data,
62
                  AggregateFunctionVarianceSimple<StatFunc, NullableInput>> {
63
public:
64
    using InputCol = ColumnVector<StatFunc::Type>;
65
    using ResultCol = ColumnFloat64;
66
    using InputType = typename StatFunc::DataType;
67
68
    explicit AggregateFunctionVarianceSimple(STATISTICS_FUNCTION_KIND kind_,
69
                                             const DataTypes& argument_types_)
70
0
            : IAggregateFunctionDataHelper<
71
0
                      typename StatFunc::Data,
72
0
                      AggregateFunctionVarianceSimple<StatFunc, NullableInput>>(argument_types_),
73
0
              kind(kind_) {
74
0
        DCHECK(!argument_types_.empty());
75
0
    }
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
Unexecuted instantiation: _ZN5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EEC2ENS_24STATISTICS_FUNCTION_KINDERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaISA_EE
76
77
0
    String get_name() const override { return to_string(kind); }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE8get_nameB5cxx11Ev
78
79
0
    DataTypePtr get_return_type() const override {
80
0
        return make_nullable(std::make_shared<DataTypeFloat64>());
81
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE15get_return_typeEv
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE15get_return_typeEv
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE15get_return_typeEv
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE15get_return_typeEv
82
83
    void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num,
84
0
             Arena&) const override {
85
0
        if constexpr (NullableInput) {
86
0
            const ColumnNullable& column_with_nullable =
87
0
                    assert_cast<const ColumnNullable&, TypeCheckOnRelease::DISABLE>(*columns[0]);
88
89
0
            if (column_with_nullable.is_null_at(row_num)) {
90
0
                return;
91
0
            } else {
92
0
                this->data(place).add(
93
0
                        (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>(
94
0
                                column_with_nullable.get_nested_column())
95
0
                                .get_data()[row_num]);
96
0
            }
97
98
0
        } else {
99
0
            this->data(place).add(
100
0
                    (InputType)assert_cast<const InputCol&, TypeCheckOnRelease::DISABLE>(
101
0
                            *columns[0])
102
0
                            .get_data()[row_num]);
103
0
        }
104
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE3addEPcPPKNS_7IColumnElRNS_5ArenaE
105
106
    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
107
0
               Arena&) const override {
108
0
        this->data(place).merge(this->data(rhs));
109
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE5mergeEPcPKcRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE5mergeEPcPKcRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE5mergeEPcPKcRNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE5mergeEPcPKcRNS_5ArenaE
110
111
0
    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
112
0
        this->data(place).write(buf);
113
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE9serializeEPKcRNS_14BufferWritableE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE9serializeEPKcRNS_14BufferWritableE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE9serializeEPKcRNS_14BufferWritableE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE9serializeEPKcRNS_14BufferWritableE
114
115
    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
116
0
                     Arena&) const override {
117
0
        this->data(place).read(buf);
118
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE11deserializeEPcRNS_14BufferReadableERNS_5ArenaE
119
120
0
    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
121
0
        const auto& data = this->data(place);
122
0
        ColumnNullable& dst_column_with_nullable = assert_cast<ColumnNullable&>(to);
123
0
        ResultCol* dst_column =
124
0
                assert_cast<ResultCol*>(&(dst_column_with_nullable.get_nested_column()));
125
126
0
        switch (kind) {
127
0
        case STATISTICS_FUNCTION_KIND::SKEW_POP: {
128
            // If input is empty set, we will get NAN from get_population()
129
0
            Float64 var_value = data.get_population();
130
0
            Float64 moments_3 = data.get_moment_3();
131
132
0
            if (std::isnan(var_value) || std::isnan(moments_3) || var_value <= 0) {
133
0
                dst_column_with_nullable.get_null_map_data().push_back(1);
134
0
                dst_column->insert_default();
135
0
            } else {
136
0
                dst_column_with_nullable.get_null_map_data().push_back(0);
137
0
                dst_column->get_data().push_back(
138
0
                        static_cast<Float64>(moments_3 / pow(var_value, 1.5)));
139
0
            }
140
0
            break;
141
0
        }
142
0
        case STATISTICS_FUNCTION_KIND::KURT_POP: {
143
0
            Float64 var_value = data.get_population();
144
0
            Float64 moments_4 = data.get_moment_4();
145
146
0
            if (std::isnan(var_value) || std::isnan(moments_4) || var_value <= 0) {
147
0
                dst_column_with_nullable.get_null_map_data().push_back(1);
148
0
                dst_column->insert_default();
149
0
            } else {
150
0
                dst_column_with_nullable.get_null_map_data().push_back(0);
151
                // kurtosis = E(X^4) / E(X^2)^2 - 3
152
0
                dst_column->get_data().push_back(
153
0
                        static_cast<Float64>(moments_4 / pow(var_value, 2)) - 3);
154
0
            }
155
0
            break;
156
0
        }
157
0
        default:
158
0
            throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Unknown statistics function kind");
159
0
        }
160
0
    }
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb1EE18insert_result_intoEPKcRNS_7IColumnE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm3EEELb0EE18insert_result_intoEPKcRNS_7IColumnE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb1EE18insert_result_intoEPKcRNS_7IColumnE
Unexecuted instantiation: _ZNK5doris31AggregateFunctionVarianceSimpleINS_14StatFuncOneArgILNS_13PrimitiveTypeE9ELm4EEELb0EE18insert_result_intoEPKcRNS_7IColumnE
161
162
private:
163
    STATISTICS_FUNCTION_KIND kind;
164
};
165
166
} // namespace doris
167
#include "common/compile_check_end.h"