be/src/exprs/function/array/function_array_difference.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <fmt/format.h> |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <boost/iterator/iterator_facade.hpp> |
26 | | #include <memory> |
27 | | #include <ostream> |
28 | | #include <string> |
29 | | #include <utility> |
30 | | |
31 | | #include "common/status.h" |
32 | | #include "core/assert_cast.h" |
33 | | #include "core/block/block.h" |
34 | | #include "core/block/column_numbers.h" |
35 | | #include "core/block/column_with_type_and_name.h" |
36 | | #include "core/column/column.h" |
37 | | #include "core/column/column_array.h" |
38 | | #include "core/column/column_decimal.h" |
39 | | #include "core/column/column_nullable.h" |
40 | | #include "core/column/column_vector.h" |
41 | | #include "core/data_type/data_type.h" |
42 | | #include "core/data_type/data_type_array.h" |
43 | | #include "core/data_type/data_type_nullable.h" |
44 | | #include "core/data_type/data_type_number.h" |
45 | | #include "core/types.h" |
46 | | #include "exec/common/util.hpp" |
47 | | #include "exprs/aggregate/aggregate_function.h" |
48 | | #include "exprs/function/function.h" |
49 | | |
50 | | namespace doris { |
51 | | class FunctionContext; |
52 | | } // namespace doris |
53 | | |
54 | | namespace doris { |
55 | | |
56 | | class FunctionArrayDifference : public IFunction { |
57 | | public: |
58 | | static constexpr auto name = "array_difference"; |
59 | | |
60 | 2 | static FunctionPtr create() { return std::make_shared<FunctionArrayDifference>(); } |
61 | | |
62 | 1 | String get_name() const override { return name; } |
63 | | |
64 | 1 | bool is_variadic() const override { return false; } |
65 | | |
66 | 0 | size_t get_number_of_arguments() const override { return 1; } |
67 | | |
68 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
69 | 0 | DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY) |
70 | 0 | << "argument for function: " << name << " should be DataTypeArray but it has type " |
71 | 0 | << arguments[0]->get_name() << "."; |
72 | 0 | auto nested_type = assert_cast<const DataTypeArray&>(*(arguments[0])).get_nested_type(); |
73 | 0 | bool is_nullable = nested_type->is_nullable(); |
74 | | |
75 | | //return type is promoted to prevent result overflow |
76 | | //like: input is int32 ---> return type will be int64 |
77 | 0 | DataTypePtr return_type = nullptr; |
78 | 0 | switch (nested_type->get_primitive_type()) { |
79 | 0 | case TYPE_BOOLEAN: |
80 | 0 | case TYPE_TINYINT: |
81 | 0 | return_type = std::make_shared<DataTypeInt16>(); |
82 | 0 | break; |
83 | 0 | case TYPE_SMALLINT: |
84 | 0 | return_type = std::make_shared<DataTypeInt32>(); |
85 | 0 | break; |
86 | 0 | case TYPE_INT: |
87 | 0 | return_type = std::make_shared<DataTypeInt64>(); |
88 | 0 | break; |
89 | 0 | case TYPE_BIGINT: |
90 | 0 | case TYPE_LARGEINT: |
91 | 0 | return_type = std::make_shared<DataTypeInt128>(); |
92 | 0 | break; |
93 | 0 | case TYPE_FLOAT: |
94 | 0 | case TYPE_DOUBLE: |
95 | 0 | return_type = std::make_shared<DataTypeFloat64>(); |
96 | 0 | break; |
97 | 0 | case TYPE_DECIMAL32: |
98 | 0 | case TYPE_DECIMAL64: |
99 | 0 | case TYPE_DECIMALV2: |
100 | 0 | case TYPE_DECIMAL128I: |
101 | 0 | case TYPE_DECIMAL256: |
102 | 0 | return arguments[0]; |
103 | 0 | default: |
104 | 0 | break; |
105 | 0 | } |
106 | 0 | if (return_type) { |
107 | 0 | return std::make_shared<DataTypeArray>(is_nullable ? make_nullable(return_type) |
108 | 0 | : return_type); |
109 | 0 | } |
110 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
111 | 0 | "Function of {}, return type get wrong: and input argument is: {}", |
112 | 0 | name, arguments[0]->get_name()); |
113 | 0 | } |
114 | | |
115 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
116 | 0 | uint32_t result, size_t input_rows_count) const override { |
117 | 0 | const ColumnWithTypeAndName& arg = block.get_by_position(arguments[0]); |
118 | 0 | auto res_column = _execute_non_nullable(arg, input_rows_count); |
119 | 0 | if (!res_column) { |
120 | 0 | return Status::RuntimeError( |
121 | 0 | fmt::format("unsupported types for function {}({})", get_name(), |
122 | 0 | block.get_by_position(arguments[0]).type->get_name())); |
123 | 0 | } |
124 | 0 | DCHECK_EQ(arg.column->size(), res_column->size()); |
125 | 0 | block.replace_by_position(result, std::move(res_column)); |
126 | 0 | return Status::OK(); |
127 | 0 | } |
128 | | |
129 | | private: |
130 | | template <typename Element, typename Result> |
131 | | NO_SANITIZE_UNDEFINED static void impl(const Element* __restrict src, Result* __restrict dst, |
132 | 0 | size_t begin, size_t end) { |
133 | 0 | size_t curr_pos = begin; |
134 | 0 | if (curr_pos < end) { |
135 | 0 | Element prev_element = src[curr_pos]; |
136 | 0 | dst[curr_pos] = {}; |
137 | 0 | curr_pos++; |
138 | 0 | Element curr_element = src[curr_pos]; |
139 | 0 | for (; curr_pos < end; ++curr_pos) { |
140 | 0 | curr_element = src[curr_pos]; |
141 | 0 | dst[curr_pos] = |
142 | 0 | static_cast<Result>(curr_element) - static_cast<Result>(prev_element); |
143 | 0 | prev_element = curr_element; |
144 | 0 | } |
145 | 0 | } |
146 | 0 | } Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIhsEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIasEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIsiEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIilEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIlnEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implInnEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIfdEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implIddEEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_7DecimalIiEES3_EEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_7DecimalIlEES3_EEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_12Decimal128V3ES2_EEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_14DecimalV2ValueES2_EEvPKT_PT0_mm Unexecuted instantiation: _ZN5doris23FunctionArrayDifference4implINS_7DecimalIN4wide7integerILm256EiEEEES6_EEvPKT_PT0_mm |
147 | | |
148 | | template <PrimitiveType Element, PrimitiveType Result> |
149 | | ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, |
150 | | const IColumn& nested_column, |
151 | 0 | ColumnPtr nested_null_map) const { |
152 | 0 | using ColVecType = typename PrimitiveTypeTraits<Element>::ColumnType; |
153 | 0 | using ColVecResult = typename PrimitiveTypeTraits<Result>::ColumnType; |
154 | 0 | typename ColVecResult::MutablePtr res_nested = nullptr; |
155 | |
|
156 | 0 | const auto& src_data = reinterpret_cast<const ColVecType&>(nested_column).get_data(); |
157 | 0 | if constexpr (is_decimal(Result)) { |
158 | 0 | res_nested = ColVecResult::create(0, src_data.get_scale()); |
159 | 0 | } else { |
160 | 0 | res_nested = ColVecResult::create(); |
161 | 0 | } |
162 | 0 | auto size = nested_column.size(); |
163 | 0 | typename ColVecResult::Container& res_values = res_nested->get_data(); |
164 | 0 | res_values.resize(size); |
165 | |
|
166 | 0 | size_t pos = 0; |
167 | 0 | for (auto offset : offsets) { |
168 | 0 | impl(src_data.data(), res_values.data(), pos, offset); |
169 | 0 | pos = offset; |
170 | 0 | } |
171 | 0 | if (nested_null_map) { |
172 | 0 | auto null_map_col = ColumnUInt8::create(size, 0); |
173 | 0 | auto& null_map_col_data = null_map_col->get_data(); |
174 | 0 | auto nested_colum_data = static_cast<const ColumnUInt8*>(nested_null_map.get()); |
175 | 0 | VectorizedUtils::update_null_map(null_map_col_data, nested_colum_data->get_data()); |
176 | 0 | for (size_t row = 0; row < offsets.size(); ++row) { |
177 | 0 | auto off = offsets[row - 1]; |
178 | 0 | auto len = offsets[row] - off; |
179 | 0 | auto nested_pos = len ? len - 1 : 0; |
180 | 0 | for (; nested_pos > 0; --nested_pos) { |
181 | 0 | if (null_map_col_data[nested_pos + off - 1]) { |
182 | 0 | null_map_col_data[nested_pos + off] = 1; |
183 | 0 | } |
184 | 0 | } |
185 | 0 | } |
186 | 0 | return ColumnNullable::create(std::move(res_nested), std::move(null_map_col)); |
187 | 0 | } else { |
188 | 0 | return res_nested; |
189 | 0 | } |
190 | 0 | } Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE2ELS2_4EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE3ELS2_4EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE4ELS2_5EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE5ELS2_6EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE6ELS2_7EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE7ELS2_7EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE8ELS2_9EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE9ELS2_9EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE28ELS2_28EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE29ELS2_29EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE30ELS2_30EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE20ELS2_20EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ Unexecuted instantiation: _ZNK5doris23FunctionArrayDifference24_execute_number_expandedILNS_13PrimitiveTypeE35ELS2_35EEENS_3COWINS_7IColumnEE13immutable_ptrIS4_EERKNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKS4_S7_ |
191 | | |
192 | | ColumnPtr _execute_non_nullable(const ColumnWithTypeAndName& arg, |
193 | 0 | size_t input_rows_count) const { |
194 | | // check array nested column type and get data |
195 | 0 | auto left_column = arg.column->convert_to_full_column_if_const(); |
196 | 0 | const auto& array_column = reinterpret_cast<const ColumnArray&>(*left_column); |
197 | 0 | const auto& offsets = array_column.get_offsets(); |
198 | 0 | DCHECK(offsets.size() == input_rows_count); |
199 | |
|
200 | 0 | ColumnPtr nested_column = nullptr; |
201 | 0 | ColumnPtr nested_null_map = nullptr; |
202 | 0 | if (is_column_nullable(array_column.get_data())) { |
203 | 0 | const auto& nested_null_column = |
204 | 0 | reinterpret_cast<const ColumnNullable&>(array_column.get_data()); |
205 | 0 | nested_column = nested_null_column.get_nested_column_ptr(); |
206 | 0 | nested_null_map = nested_null_column.get_null_map_column_ptr(); |
207 | 0 | } else { |
208 | 0 | nested_column = array_column.get_data_ptr(); |
209 | 0 | } |
210 | |
|
211 | 0 | ColumnPtr res = nullptr; |
212 | 0 | auto left_element_type = |
213 | 0 | remove_nullable(assert_cast<const DataTypeArray&>(*arg.type).get_nested_type()); |
214 | 0 | switch (left_element_type->get_primitive_type()) { |
215 | 0 | case TYPE_BOOLEAN: |
216 | 0 | res = _execute_number_expanded<TYPE_BOOLEAN, TYPE_SMALLINT>(offsets, *nested_column, |
217 | 0 | nested_null_map); |
218 | 0 | break; |
219 | 0 | case TYPE_TINYINT: |
220 | 0 | res = _execute_number_expanded<TYPE_TINYINT, TYPE_SMALLINT>(offsets, *nested_column, |
221 | 0 | nested_null_map); |
222 | 0 | break; |
223 | 0 | case TYPE_SMALLINT: |
224 | 0 | res = _execute_number_expanded<TYPE_SMALLINT, TYPE_INT>(offsets, *nested_column, |
225 | 0 | nested_null_map); |
226 | 0 | break; |
227 | 0 | case TYPE_INT: |
228 | 0 | res = _execute_number_expanded<TYPE_INT, TYPE_BIGINT>(offsets, *nested_column, |
229 | 0 | nested_null_map); |
230 | 0 | break; |
231 | 0 | case TYPE_BIGINT: |
232 | 0 | res = _execute_number_expanded<TYPE_BIGINT, TYPE_LARGEINT>(offsets, *nested_column, |
233 | 0 | nested_null_map); |
234 | 0 | break; |
235 | 0 | case TYPE_LARGEINT: |
236 | 0 | res = _execute_number_expanded<TYPE_LARGEINT, TYPE_LARGEINT>(offsets, *nested_column, |
237 | 0 | nested_null_map); |
238 | 0 | break; |
239 | 0 | case TYPE_FLOAT: |
240 | 0 | res = _execute_number_expanded<TYPE_FLOAT, TYPE_DOUBLE>(offsets, *nested_column, |
241 | 0 | nested_null_map); |
242 | 0 | break; |
243 | 0 | case TYPE_DOUBLE: |
244 | 0 | res = _execute_number_expanded<TYPE_DOUBLE, TYPE_DOUBLE>(offsets, *nested_column, |
245 | 0 | nested_null_map); |
246 | 0 | break; |
247 | 0 | case TYPE_DECIMAL32: |
248 | 0 | res = _execute_number_expanded<TYPE_DECIMAL32, TYPE_DECIMAL32>(offsets, *nested_column, |
249 | 0 | nested_null_map); |
250 | 0 | break; |
251 | 0 | case TYPE_DECIMAL64: |
252 | 0 | res = _execute_number_expanded<TYPE_DECIMAL64, TYPE_DECIMAL64>(offsets, *nested_column, |
253 | 0 | nested_null_map); |
254 | 0 | break; |
255 | 0 | case TYPE_DECIMAL128I: |
256 | 0 | res = _execute_number_expanded<TYPE_DECIMAL128I, TYPE_DECIMAL128I>( |
257 | 0 | offsets, *nested_column, nested_null_map); |
258 | 0 | break; |
259 | 0 | case TYPE_DECIMALV2: |
260 | 0 | res = _execute_number_expanded<TYPE_DECIMALV2, TYPE_DECIMALV2>(offsets, *nested_column, |
261 | 0 | nested_null_map); |
262 | 0 | break; |
263 | 0 | case TYPE_DECIMAL256: |
264 | 0 | res = _execute_number_expanded<TYPE_DECIMAL256, TYPE_DECIMAL256>( |
265 | 0 | offsets, *nested_column, nested_null_map); |
266 | 0 | break; |
267 | 0 | default: |
268 | 0 | return nullptr; |
269 | 0 | } |
270 | 0 | return ColumnArray::create(res, array_column.get_offsets_ptr()); |
271 | 0 | } |
272 | | }; |
273 | | |
274 | | } // namespace doris |