be/src/exprs/function/function_width_bucket.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <stddef.h> |
19 | | #include <stdint.h> |
20 | | |
21 | | #include <algorithm> |
22 | | #include <boost/iterator/iterator_facade.hpp> |
23 | | #include <memory> |
24 | | #include <utility> |
25 | | |
26 | | #include "common/status.h" |
27 | | #include "core/assert_cast.h" |
28 | | #include "core/block/block.h" |
29 | | #include "core/block/column_numbers.h" |
30 | | #include "core/block/column_with_type_and_name.h" |
31 | | #include "core/column/column.h" |
32 | | #include "core/column/column_vector.h" |
33 | | #include "core/data_type/data_type.h" |
34 | | #include "core/data_type/data_type_nullable.h" |
35 | | #include "core/data_type/data_type_number.h" |
36 | | #include "core/data_type/primitive_type.h" |
37 | | #include "core/types.h" |
38 | | #include "exprs/aggregate/aggregate_function.h" |
39 | | #include "exprs/function/function.h" |
40 | | #include "exprs/function/simple_function_factory.h" |
41 | | |
42 | | namespace doris { |
43 | | class FunctionContext; |
44 | | } // namespace doris |
45 | | |
46 | | namespace doris { |
47 | | class FunctionWidthBucket : public IFunction { |
48 | | public: |
49 | | static constexpr auto name = "width_bucket"; |
50 | 64 | static FunctionPtr create() { return std::make_shared<FunctionWidthBucket>(); } |
51 | | |
52 | | /// Get function name. |
53 | 1 | String get_name() const override { return name; } |
54 | | |
55 | 56 | bool is_variadic() const override { return false; } |
56 | | |
57 | 55 | size_t get_number_of_arguments() const override { return 4; } |
58 | | |
59 | 55 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
60 | 55 | return std::make_shared<DataTypeInt64>(); |
61 | 55 | } |
62 | | |
63 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
64 | 119 | uint32_t result, size_t input_rows_count) const override { |
65 | 119 | ColumnPtr expr_ptr = |
66 | 119 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
67 | 119 | ColumnPtr min_value_ptr = |
68 | 119 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
69 | 119 | ColumnPtr max_value_ptr = |
70 | 119 | block.get_by_position(arguments[2]).column->convert_to_full_column_if_const(); |
71 | 119 | ColumnPtr num_buckets_ptr = block.get_by_position(arguments[3]).column; |
72 | 119 | int64_t num_buckets = num_buckets_ptr->get_int(0); |
73 | | |
74 | 119 | if (num_buckets <= 0) { |
75 | 1 | return Status::InternalError( |
76 | 1 | "The desired number({}) of buckets must be a positive integer value.", |
77 | 1 | num_buckets); |
78 | 1 | } |
79 | | |
80 | 118 | auto nested_column_ptr = ColumnInt64::create(input_rows_count, 0); |
81 | 118 | DataTypePtr expr_type = block.get_by_position(arguments[0]).type; |
82 | | |
83 | 118 | if (!_execute_by_type(*expr_ptr, *min_value_ptr, *max_value_ptr, num_buckets, |
84 | 118 | *nested_column_ptr, expr_type)) { |
85 | 0 | return Status::InvalidArgument("Unsupported type for width_bucket: {}", |
86 | 0 | expr_type->get_name()); |
87 | 0 | } |
88 | | |
89 | 118 | block.replace_by_position(result, std::move(nested_column_ptr)); |
90 | 118 | return Status::OK(); |
91 | 118 | } |
92 | | |
93 | | private: |
94 | | template <typename ColumnType> |
95 | | void _execute(const IColumn& expr_column, const IColumn& min_value_column, |
96 | | const IColumn& max_value_column, const int64_t num_buckets, |
97 | 120 | IColumn& nested_column) const { |
98 | 120 | const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column); |
99 | 120 | const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column); |
100 | 120 | const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column); |
101 | 120 | auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column); |
102 | | |
103 | 120 | size_t input_rows_count = expr_column.size(); |
104 | | |
105 | 515 | for (size_t i = 0; i < input_rows_count; ++i) { |
106 | 395 | auto min_value = min_value_column_concrete.get_data()[i]; |
107 | 395 | auto max_value = max_value_column_concrete.get_data()[i]; |
108 | 395 | auto average_value = (max_value - min_value) / (1.0 * num_buckets); |
109 | 395 | if (expr_column_concrete.get_data()[i] < min_value) { |
110 | 14 | continue; |
111 | 381 | } else if (expr_column_concrete.get_data()[i] >= max_value) { |
112 | 33 | nested_column_concrete.get_data()[i] = num_buckets + 1; |
113 | 348 | } else { |
114 | 348 | if ((max_value - min_value) / num_buckets == 0) { |
115 | 0 | continue; |
116 | 0 | } |
117 | 348 | nested_column_concrete.get_data()[i] = |
118 | 348 | (int64_t)(1 + |
119 | 348 | (expr_column_concrete.get_data()[i] - min_value) / average_value); |
120 | 348 | } |
121 | 395 | } |
122 | 120 | } _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE3EEEEEvRKNS_7IColumnES7_S7_lRS5_ Line | Count | Source | 97 | 7 | IColumn& nested_column) const { | 98 | 7 | const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column); | 99 | 7 | const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column); | 100 | 7 | const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column); | 101 | 7 | auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column); | 102 | | | 103 | 7 | size_t input_rows_count = expr_column.size(); | 104 | | | 105 | 14 | for (size_t i = 0; i < input_rows_count; ++i) { | 106 | 7 | auto min_value = min_value_column_concrete.get_data()[i]; | 107 | 7 | auto max_value = max_value_column_concrete.get_data()[i]; | 108 | 7 | auto average_value = (max_value - min_value) / (1.0 * num_buckets); | 109 | 7 | if (expr_column_concrete.get_data()[i] < min_value) { | 110 | 2 | continue; | 111 | 5 | } else if (expr_column_concrete.get_data()[i] >= max_value) { | 112 | 2 | nested_column_concrete.get_data()[i] = num_buckets + 1; | 113 | 3 | } else { | 114 | 3 | if ((max_value - min_value) / num_buckets == 0) { | 115 | 0 | continue; | 116 | 0 | } | 117 | 3 | nested_column_concrete.get_data()[i] = | 118 | 3 | (int64_t)(1 + | 119 | 3 | (expr_column_concrete.get_data()[i] - min_value) / average_value); | 120 | 3 | } | 121 | 7 | } | 122 | 7 | } |
Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE4EEEEEvRKNS_7IColumnES7_S7_lRS5_ Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE5EEEEEvRKNS_7IColumnES7_S7_lRS5_ _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE6EEEEEvRKNS_7IColumnES7_S7_lRS5_ Line | Count | Source | 97 | 8 | IColumn& nested_column) const { | 98 | 8 | const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column); | 99 | 8 | const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column); | 100 | 8 | const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column); | 101 | 8 | auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column); | 102 | | | 103 | 8 | size_t input_rows_count = expr_column.size(); | 104 | | | 105 | 33 | for (size_t i = 0; i < input_rows_count; ++i) { | 106 | 25 | auto min_value = min_value_column_concrete.get_data()[i]; | 107 | 25 | auto max_value = max_value_column_concrete.get_data()[i]; | 108 | 25 | auto average_value = (max_value - min_value) / (1.0 * num_buckets); | 109 | 25 | if (expr_column_concrete.get_data()[i] < min_value) { | 110 | 0 | continue; | 111 | 25 | } else if (expr_column_concrete.get_data()[i] >= max_value) { | 112 | 0 | nested_column_concrete.get_data()[i] = num_buckets + 1; | 113 | 25 | } else { | 114 | 25 | if ((max_value - min_value) / num_buckets == 0) { | 115 | 0 | continue; | 116 | 0 | } | 117 | 25 | nested_column_concrete.get_data()[i] = | 118 | 25 | (int64_t)(1 + | 119 | 25 | (expr_column_concrete.get_data()[i] - min_value) / average_value); | 120 | 25 | } | 121 | 25 | } | 122 | 8 | } |
Unexecuted instantiation: _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE8EEEEEvRKNS_7IColumnES7_S7_lRS5_ _ZNK5doris19FunctionWidthBucket8_executeINS_12ColumnVectorILNS_13PrimitiveTypeE9EEEEEvRKNS_7IColumnES7_S7_lRS5_ Line | Count | Source | 97 | 105 | IColumn& nested_column) const { | 98 | 105 | const auto& expr_column_concrete = assert_cast<const ColumnType&>(expr_column); | 99 | 105 | const auto& min_value_column_concrete = assert_cast<const ColumnType&>(min_value_column); | 100 | 105 | const auto& max_value_column_concrete = assert_cast<const ColumnType&>(max_value_column); | 101 | 105 | auto& nested_column_concrete = assert_cast<ColumnInt64&>(nested_column); | 102 | | | 103 | 105 | size_t input_rows_count = expr_column.size(); | 104 | | | 105 | 468 | for (size_t i = 0; i < input_rows_count; ++i) { | 106 | 363 | auto min_value = min_value_column_concrete.get_data()[i]; | 107 | 363 | auto max_value = max_value_column_concrete.get_data()[i]; | 108 | 363 | auto average_value = (max_value - min_value) / (1.0 * num_buckets); | 109 | 363 | if (expr_column_concrete.get_data()[i] < min_value) { | 110 | 12 | continue; | 111 | 351 | } else if (expr_column_concrete.get_data()[i] >= max_value) { | 112 | 31 | nested_column_concrete.get_data()[i] = num_buckets + 1; | 113 | 320 | } else { | 114 | 320 | if ((max_value - min_value) / num_buckets == 0) { | 115 | 0 | continue; | 116 | 0 | } | 117 | 320 | nested_column_concrete.get_data()[i] = | 118 | 320 | (int64_t)(1 + | 119 | 320 | (expr_column_concrete.get_data()[i] - min_value) / average_value); | 120 | 320 | } | 121 | 363 | } | 122 | 105 | } |
|
123 | | |
124 | | bool _execute_by_type(const IColumn& expr_column, const IColumn& min_value_column, |
125 | | const IColumn& max_value_column, const int64_t num_buckets, |
126 | 120 | IColumn& nested_column_column, DataTypePtr& expr_type) const { |
127 | 120 | switch (expr_type->get_primitive_type()) { |
128 | 7 | case PrimitiveType::TYPE_TINYINT: |
129 | 7 | _execute<ColumnInt8>(expr_column, min_value_column, max_value_column, num_buckets, |
130 | 7 | nested_column_column); |
131 | 7 | break; |
132 | 0 | case PrimitiveType::TYPE_SMALLINT: |
133 | 0 | _execute<ColumnInt16>(expr_column, min_value_column, max_value_column, num_buckets, |
134 | 0 | nested_column_column); |
135 | 0 | break; |
136 | 0 | case PrimitiveType::TYPE_INT: |
137 | 0 | _execute<ColumnInt32>(expr_column, min_value_column, max_value_column, num_buckets, |
138 | 0 | nested_column_column); |
139 | 0 | break; |
140 | 8 | case PrimitiveType::TYPE_BIGINT: |
141 | 8 | _execute<ColumnInt64>(expr_column, min_value_column, max_value_column, num_buckets, |
142 | 8 | nested_column_column); |
143 | 8 | break; |
144 | 0 | case PrimitiveType::TYPE_FLOAT: |
145 | 0 | _execute<ColumnFloat32>(expr_column, min_value_column, max_value_column, num_buckets, |
146 | 0 | nested_column_column); |
147 | 0 | break; |
148 | 105 | case PrimitiveType::TYPE_DOUBLE: |
149 | 105 | _execute<ColumnFloat64>(expr_column, min_value_column, max_value_column, num_buckets, |
150 | 105 | nested_column_column); |
151 | 105 | break; |
152 | 0 | default: |
153 | 0 | return false; |
154 | 0 | break; |
155 | 120 | } |
156 | 120 | return true; |
157 | 120 | } |
158 | | }; |
159 | | |
160 | 8 | void register_function_width_bucket(SimpleFunctionFactory& factory) { |
161 | 8 | factory.register_function<FunctionWidthBucket>(); |
162 | 8 | } |
163 | | |
164 | | } // namespace doris |