be/src/exprs/function/function_width_bucket.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <stddef.h> |
19 | | #include <stdint.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <boost/iterator/iterator_facade.hpp> |
23 | | #include <memory> |
24 | | #include <utility> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/block/column_with_type_and_name.h" |
31 | | #include "core/column/column.h" |
32 | | #include "core/column/column_vector.h" |
33 | | #include "core/data_type/data_type.h" |
34 | | #include "core/data_type/data_type_nullable.h" |
35 | | #include "core/data_type/data_type_number.h" |
36 | | #include "core/data_type/primitive_type.h" |
37 | | #include "core/types.h" |
38 | | #include "exprs/aggregate/aggregate_function.h" |
39 | | #include "exprs/function/function.h" |
40 | | #include "exprs/function/simple_function_factory.h" |
41 | | |
42 | | namespace doris { |
43 | | class FunctionContext; |
44 | | } // namespace doris |
45 | | |
46 | | namespace doris { |
47 | | class FunctionWidthBucket : public IFunction { |
48 | | public: |
49 | | static constexpr auto name = "width_bucket"; |
50 | 2 | static FunctionPtr create() { return std::make_shared<FunctionWidthBucket>(); } |
51 | | |
52 | | /// Get function name. |
53 | 1 | String get_name() const override { return name; } |
54 | | |
55 | 1 | bool is_variadic() const override { return false; } |
56 | | |
57 | 0 | size_t get_number_of_arguments() const override { return 4; } |
58 | | |
59 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
60 | 0 | return std::make_shared<DataTypeInt64>(); |
61 | 0 | } |
62 | | |
63 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
64 | 0 | uint32_t result, size_t input_rows_count) const override { |
65 | 0 | ColumnPtr expr_ptr = |
66 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
67 | 0 | ColumnPtr min_value_ptr = |
68 | 0 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
69 | 0 | ColumnPtr max_value_ptr = |
70 | 0 | block.get_by_position(arguments[2]).column->convert_to_full_column_if_const(); |
71 | 0 | ColumnPtr num_buckets_ptr = block.get_by_position(arguments[3]).column; |
72 | 0 | int64_t num_buckets = num_buckets_ptr->get_int(0); |
73 | |
|
74 | 0 | if (num_buckets <= 0) { |
75 | 0 | return Status::InternalError( |
76 | 0 | "The desired number({}) of buckets must be a positive integer value.", |
77 | 0 | num_buckets); |
78 | 0 | } |
79 | | |
80 | 0 | auto nested_column_ptr = ColumnInt64::create(input_rows_count, 0); |
81 | 0 | DataTypePtr expr_type = block.get_by_position(arguments[0]).type; |
82 | |
|
83 | 0 | if (!_execute_by_type(*expr_ptr, *min_value_ptr, *max_value_ptr, num_buckets, |
84 | 0 | *nested_column_ptr, expr_type)) { |
85 | 0 | return Status::InvalidArgument("Unsupported type for width_bucket: {}", |
86 | 0 | expr_type->get_name()); |
87 | 0 | } |
88 | | |
89 | 0 | block.replace_by_position(result, std::move(nested_column_ptr)); |
90 | 0 | return Status::OK(); |
91 | 0 | } |
92 | | |
93 | | private: |
94 | | template <typename ColumnType> |
95 | | void _execute(const IColumn& expr_column, const IColumn& min_value_column, |
96 | | const IColumn& max_value_column, const int64_t num_buckets, |
97 | 0 | IColumn& nested_column) const { |
98 | 0 | const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column); |
99 | 0 | const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column); |
100 | 0 | const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column); |
101 | 0 | auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column); |
102 | |
|
103 | 0 | size_t input_rows_count = expr_column.size(); |
104 | |
|
105 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
106 | 0 | auto min_value = min_value_column_concrete.get_data()[i]; |
107 | 0 | auto max_value = max_value_column_concrete.get_data()[i]; |
108 | 0 | auto average_value = (max_value - min_value) / (1.0 * num_buckets); |
109 | 0 | if (expr_column_concrete.get_data()[i] < min_value) { |
110 | 0 | continue; |
111 | 0 | } else if (expr_column_concrete.get_data()[i] >= max_value) { |
112 | 0 | nested_column_concrete.get_data()[i] = num_buckets + 1; |
113 | 0 | } else { |
114 | 0 | if ((max_value - min_value) / num_buckets == 0) { |
115 | 0 | continue; |
116 | 0 | } |
117 | 0 | nested_column_concrete.get_data()[i] = |
118 | 0 | (int64_t)(1 + |
119 | 0 | (expr_column_concrete.get_data()[i] - min_value) / average_value); |
120 | 0 | } |
121 | 0 | } |
122 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE3EEEEEvRKNS_7IColumnES7_S7_lRS5_ Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE4EEEEEvRKNS_7IColumnES7_S7_lRS5_ Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE5EEEEEvRKNS_7IColumnES7_S7_lRS5_ Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvRKNS_7IColumnES7_S7_lRS5_ Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE8EEEEEvRKNS_7IColumnES7_S7_lRS5_ Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE9EEEEEvRKNS_7IColumnES7_S7_lRS5_ |
123 | | |
124 | | bool _execute_by_type(const IColumn& expr_column, const IColumn& min_value_column, |
125 | | const IColumn& max_value_column, const int64_t num_buckets, |
126 | 0 | IColumn& nested_column_column, DataTypePtr& expr_type) const { |
127 | 0 | switch (expr_type->get_primitive_type()) { |
128 | 0 | case PrimitiveType::TYPE_TINYINT: |
129 | 0 | _execute<ColumnInt8>(expr_column, min_value_column, max_value_column, num_buckets, |
130 | 0 | nested_column_column); |
131 | 0 | break; |
132 | 0 | case PrimitiveType::TYPE_SMALLINT: |
133 | 0 | _execute<ColumnInt16>(expr_column, min_value_column, max_value_column, num_buckets, |
134 | 0 | nested_column_column); |
135 | 0 | break; |
136 | 0 | case PrimitiveType::TYPE_INT: |
137 | 0 | _execute<ColumnInt32>(expr_column, min_value_column, max_value_column, num_buckets, |
138 | 0 | nested_column_column); |
139 | 0 | break; |
140 | 0 | case PrimitiveType::TYPE_BIGINT: |
141 | 0 | _execute<ColumnInt64>(expr_column, min_value_column, max_value_column, num_buckets, |
142 | 0 | nested_column_column); |
143 | 0 | break; |
144 | 0 | case PrimitiveType::TYPE_FLOAT: |
145 | 0 | _execute<ColumnFloat32>(expr_column, min_value_column, max_value_column, num_buckets, |
146 | 0 | nested_column_column); |
147 | 0 | break; |
148 | 0 | case PrimitiveType::TYPE_DOUBLE: |
149 | 0 | _execute<ColumnFloat64>(expr_column, min_value_column, max_value_column, num_buckets, |
150 | 0 | nested_column_column); |
151 | 0 | break; |
152 | 0 | default: |
153 | 0 | return false; |
154 | 0 | break; |
155 | 0 | } |
156 | 0 | return true; |
157 | 0 | } |
158 | | }; |
159 | | |
160 | 1 | void register_function_width_bucket(SimpleFunctionFactory& factory) { |
161 | 1 | factory.register_function<FunctionWidthBucket>(); |
162 | 1 | } |
163 | | |
164 | | } // namespace doris |