Coverage Report

Created: 2026-03-13 05:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_width_bucket.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <stddef.h>
19
#include <stdint.h>
20
21
#include <algorithm>
22
#include <boost/iterator/iterator_facade.hpp>
23
#include <memory>
24
#include <utility>
25
26
#include "common/status.h"
27
#include "core/assert_cast.h"
28
#include "core/block/block.h"
29
#include "core/block/column_numbers.h"
30
#include "core/block/column_with_type_and_name.h"
31
#include "core/column/column.h"
32
#include "core/column/column_vector.h"
33
#include "core/data_type/data_type.h"
34
#include "core/data_type/data_type_nullable.h"
35
#include "core/data_type/data_type_number.h"
36
#include "core/data_type/primitive_type.h"
37
#include "core/types.h"
38
#include "exprs/aggregate/aggregate_function.h"
39
#include "exprs/function/function.h"
40
#include "exprs/function/simple_function_factory.h"
41
42
namespace doris {
43
class FunctionContext;
44
} // namespace doris
45
46
namespace doris {
47
class FunctionWidthBucket : public IFunction {
48
public:
49
    static constexpr auto name = "width_bucket";
50
64
    static FunctionPtr create() { return std::make_shared<FunctionWidthBucket>(); }
51
52
    /// Get function name.
53
1
    String get_name() const override { return name; }
54
55
56
    bool is_variadic() const override { return false; }
56
57
55
    size_t get_number_of_arguments() const override { return 4; }
58
59
55
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
60
55
        return std::make_shared<DataTypeInt64>();
61
55
    }
62
63
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
64
119
                        uint32_t result, size_t input_rows_count) const override {
65
119
        ColumnPtr expr_ptr =
66
119
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
67
119
        ColumnPtr min_value_ptr =
68
119
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
69
119
        ColumnPtr max_value_ptr =
70
119
                block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
71
119
        ColumnPtr num_buckets_ptr = block.get_by_position(arguments[3]).column;
72
119
        int64_t num_buckets = num_buckets_ptr->get_int(0);
73
74
119
        if (num_buckets <= 0) {
75
1
            return Status::InternalError(
76
1
                    "The desired number({}) of buckets must be a positive integer value.",
77
1
                    num_buckets);
78
1
        }
79
80
118
        auto nested_column_ptr = ColumnInt64::create(input_rows_count, 0);
81
118
        DataTypePtr expr_type = block.get_by_position(arguments[0]).type;
82
83
118
        if (!_execute_by_type(*expr_ptr, *min_value_ptr, *max_value_ptr, num_buckets,
84
118
                              *nested_column_ptr, expr_type)) {
85
0
            return Status::InvalidArgument("Unsupported type for width_bucket: {}",
86
0
                                           expr_type->get_name());
87
0
        }
88
89
118
        block.replace_by_position(result, std::move(nested_column_ptr));
90
118
        return Status::OK();
91
118
    }
92
93
private:
94
    template <typename ColumnType>
95
    void _execute(const IColumn& expr_column, const IColumn& min_value_column,
96
                  const IColumn& max_value_column, const int64_t num_buckets,
97
120
                  IColumn& nested_column) const {
98
120
        const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column);
99
120
        const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column);
100
120
        const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column);
101
120
        auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column);
102
103
120
        size_t input_rows_count = expr_column.size();
104
105
515
        for (size_t i = 0; i < input_rows_count; ++i) {
106
395
            auto min_value = min_value_column_concrete.get_data()[i];
107
395
            auto max_value = max_value_column_concrete.get_data()[i];
108
395
            auto average_value = (max_value - min_value) / (1.0 * num_buckets);
109
395
            if (expr_column_concrete.get_data()[i] < min_value) {
110
14
                continue;
111
381
            } else if (expr_column_concrete.get_data()[i] >= max_value) {
112
33
                nested_column_concrete.get_data()[i] = num_buckets + 1;
113
348
            } else {
114
348
                if ((max_value - min_value) / num_buckets == 0) {
115
0
                    continue;
116
0
                }
117
348
                nested_column_concrete.get_data()[i] =
118
348
                        (int64_t)(1 +
119
348
                                  (expr_column_concrete.get_data()[i] - min_value) / average_value);
120
348
            }
121
395
        }
122
120
    }
_ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE3EEEEEvRKNS_7IColumnES7_S7_lRS5_
Line
Count
Source
97
7
                  IColumn& nested_column) const {
98
7
        const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column);
99
7
        const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column);
100
7
        const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column);
101
7
        auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column);
102
103
7
        size_t input_rows_count = expr_column.size();
104
105
14
        for (size_t i = 0; i < input_rows_count; ++i) {
106
7
            auto min_value = min_value_column_concrete.get_data()[i];
107
7
            auto max_value = max_value_column_concrete.get_data()[i];
108
7
            auto average_value = (max_value - min_value) / (1.0 * num_buckets);
109
7
            if (expr_column_concrete.get_data()[i] < min_value) {
110
2
                continue;
111
5
            } else if (expr_column_concrete.get_data()[i] >= max_value) {
112
2
                nested_column_concrete.get_data()[i] = num_buckets + 1;
113
3
            } else {
114
3
                if ((max_value - min_value) / num_buckets == 0) {
115
0
                    continue;
116
0
                }
117
3
                nested_column_concrete.get_data()[i] =
118
3
                        (int64_t)(1 +
119
3
                                  (expr_column_concrete.get_data()[i] - min_value) / average_value);
120
3
            }
121
7
        }
122
7
    }
Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE4EEEEEvRKNS_7IColumnES7_S7_lRS5_
Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE5EEEEEvRKNS_7IColumnES7_S7_lRS5_
_ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvRKNS_7IColumnES7_S7_lRS5_
Line
Count
Source
97
8
                  IColumn& nested_column) const {
98
8
        const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column);
99
8
        const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column);
100
8
        const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column);
101
8
        auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column);
102
103
8
        size_t input_rows_count = expr_column.size();
104
105
33
        for (size_t i = 0; i < input_rows_count; ++i) {
106
25
            auto min_value = min_value_column_concrete.get_data()[i];
107
25
            auto max_value = max_value_column_concrete.get_data()[i];
108
25
            auto average_value = (max_value - min_value) / (1.0 * num_buckets);
109
25
            if (expr_column_concrete.get_data()[i] < min_value) {
110
0
                continue;
111
25
            } else if (expr_column_concrete.get_data()[i] >= max_value) {
112
0
                nested_column_concrete.get_data()[i] = num_buckets + 1;
113
25
            } else {
114
25
                if ((max_value - min_value) / num_buckets == 0) {
115
0
                    continue;
116
0
                }
117
25
                nested_column_concrete.get_data()[i] =
118
25
                        (int64_t)(1 +
119
25
                                  (expr_column_concrete.get_data()[i] - min_value) / average_value);
120
25
            }
121
25
        }
122
8
    }
Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE8EEEEEvRKNS_7IColumnES7_S7_lRS5_
_ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE9EEEEEvRKNS_7IColumnES7_S7_lRS5_
Line
Count
Source
97
105
                  IColumn& nested_column) const {
98
105
        const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column);
99
105
        const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column);
100
105
        const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column);
101
105
        auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column);
102
103
105
        size_t input_rows_count = expr_column.size();
104
105
468
        for (size_t i = 0; i < input_rows_count; ++i) {
106
363
            auto min_value = min_value_column_concrete.get_data()[i];
107
363
            auto max_value = max_value_column_concrete.get_data()[i];
108
363
            auto average_value = (max_value - min_value) / (1.0 * num_buckets);
109
363
            if (expr_column_concrete.get_data()[i] < min_value) {
110
12
                continue;
111
351
            } else if (expr_column_concrete.get_data()[i] >= max_value) {
112
31
                nested_column_concrete.get_data()[i] = num_buckets + 1;
113
320
            } else {
114
320
                if ((max_value - min_value) / num_buckets == 0) {
115
0
                    continue;
116
0
                }
117
320
                nested_column_concrete.get_data()[i] =
118
320
                        (int64_t)(1 +
119
320
                                  (expr_column_concrete.get_data()[i] - min_value) / average_value);
120
320
            }
121
363
        }
122
105
    }
123
124
    bool _execute_by_type(const IColumn& expr_column, const IColumn& min_value_column,
125
                          const IColumn& max_value_column, const int64_t num_buckets,
126
120
                          IColumn& nested_column_column, DataTypePtr& expr_type) const {
127
120
        switch (expr_type->get_primitive_type()) {
128
7
        case PrimitiveType::TYPE_TINYINT:
129
7
            _execute<ColumnInt8>(expr_column, min_value_column, max_value_column, num_buckets,
130
7
                                 nested_column_column);
131
7
            break;
132
0
        case PrimitiveType::TYPE_SMALLINT:
133
0
            _execute<ColumnInt16>(expr_column, min_value_column, max_value_column, num_buckets,
134
0
                                  nested_column_column);
135
0
            break;
136
0
        case PrimitiveType::TYPE_INT:
137
0
            _execute<ColumnInt32>(expr_column, min_value_column, max_value_column, num_buckets,
138
0
                                  nested_column_column);
139
0
            break;
140
8
        case PrimitiveType::TYPE_BIGINT:
141
8
            _execute<ColumnInt64>(expr_column, min_value_column, max_value_column, num_buckets,
142
8
                                  nested_column_column);
143
8
            break;
144
0
        case PrimitiveType::TYPE_FLOAT:
145
0
            _execute<ColumnFloat32>(expr_column, min_value_column, max_value_column, num_buckets,
146
0
                                    nested_column_column);
147
0
            break;
148
105
        case PrimitiveType::TYPE_DOUBLE:
149
105
            _execute<ColumnFloat64>(expr_column, min_value_column, max_value_column, num_buckets,
150
105
                                    nested_column_column);
151
105
            break;
152
0
        default:
153
0
            return false;
154
0
            break;
155
120
        }
156
120
        return true;
157
120
    }
158
};
159
160
8
void register_function_width_bucket(SimpleFunctionFactory& factory) {
161
8
    factory.register_function<FunctionWidthBucket>();
162
8
}
163
164
} // namespace doris