/root/doris/be/src/vec/functions/function.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IFunction.cpp |
19 | | // and modified by Doris |
20 | | |
21 | | #include "vec/functions/function.h" |
22 | | |
23 | | #include <algorithm> |
24 | | #include <memory> |
25 | | #include <numeric> |
26 | | #include <vector> |
27 | | |
28 | | #include "vec/aggregate_functions/aggregate_function.h" |
29 | | #include "vec/columns/column.h" |
30 | | #include "vec/columns/column_const.h" |
31 | | #include "vec/columns/column_nullable.h" |
32 | | #include "vec/columns/column_vector.h" |
33 | | #include "vec/columns/columns_number.h" |
34 | | #include "vec/common/assert_cast.h" |
35 | | #include "vec/core/field.h" |
36 | | #include "vec/data_types/data_type_array.h" |
37 | | #include "vec/data_types/data_type_nothing.h" |
38 | | #include "vec/data_types/data_type_nullable.h" |
39 | | #include "vec/functions/function_helpers.h" |
40 | | #include "vec/utils/util.hpp" |
41 | | |
42 | | namespace doris::vectorized { |
43 | | |
44 | | ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args, |
45 | 150k | size_t result, size_t input_rows_count) { |
46 | 150k | ColumnPtr result_null_map_column; |
47 | | /// If result is already nullable. |
48 | 150k | ColumnPtr src_not_nullable = src; |
49 | 150k | MutableColumnPtr mutable_result_null_map_column; |
50 | | |
51 | 150k | if (const auto* nullable = check_and_get_column<ColumnNullable>(*src)) { Branch (51:21): [True: 69.7k, False: 80.5k]
|
52 | 69.7k | src_not_nullable = nullable->get_nested_column_ptr(); |
53 | 69.7k | result_null_map_column = nullable->get_null_map_column_ptr(); |
54 | 69.7k | } |
55 | | |
56 | 160k | for (const auto& arg : args) { Branch (56:26): [True: 160k, False: 150k]
|
57 | 160k | const ColumnWithTypeAndName& elem = block.get_by_position(arg); |
58 | 160k | if (!elem.type->is_nullable() || is_column_const(*elem.column)) { Branch (58:13): [True: 133, False: 160k]
Branch (58:42): [True: 6.41k, False: 153k]
|
59 | 6.55k | continue; |
60 | 6.55k | } |
61 | | |
62 | 153k | if (const auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get()); |
63 | 153k | nullable->has_null()) { Branch (63:13): [True: 1.25k, False: 152k]
|
64 | 1.25k | const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr(); |
65 | 1.25k | if (!result_null_map_column) { // NOLINT(bugprone-use-after-move) Branch (65:17): [True: 583, False: 674]
|
66 | 583 | result_null_map_column = null_map_column->clone_resized(input_rows_count); |
67 | 583 | continue; |
68 | 583 | } |
69 | | |
70 | 674 | if (!mutable_result_null_map_column) { Branch (70:17): [True: 652, False: 22]
|
71 | 652 | mutable_result_null_map_column = |
72 | 652 | std::move(result_null_map_column)->assume_mutable(); |
73 | 652 | } |
74 | | |
75 | 674 | NullMap& result_null_map = |
76 | 674 | assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data(); |
77 | 674 | const NullMap& src_null_map = |
78 | 674 | assert_cast<const ColumnUInt8&>(*null_map_column).get_data(); |
79 | | |
80 | 674 | VectorizedUtils::update_null_map(result_null_map, src_null_map); |
81 | 674 | } |
82 | 153k | } |
83 | | |
84 | 150k | if (!result_null_map_column) { Branch (84:9): [True: 79.9k, False: 70.3k]
|
85 | 79.9k | if (is_column_const(*src)) { Branch (85:13): [True: 0, False: 79.9k]
|
86 | 0 | return ColumnConst::create( |
87 | 0 | make_nullable(assert_cast<const ColumnConst&>(*src).get_data_column_ptr(), |
88 | 0 | false), |
89 | 0 | input_rows_count); |
90 | 0 | } |
91 | 79.9k | return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0)); |
92 | 79.9k | } |
93 | | |
94 | 70.3k | return ColumnNullable::create(src_not_nullable, result_null_map_column); |
95 | 150k | } |
96 | | |
97 | 17.4k | bool have_null_column(const Block& block, const ColumnNumbers& args) { |
98 | 27.4k | return std::ranges::any_of(args, [&block](const auto& elem) { |
99 | 27.4k | return block.get_by_position(elem).type->is_nullable(); |
100 | 27.4k | }); |
101 | 17.4k | } |
102 | | |
103 | 14.3k | bool have_null_column(const ColumnsWithTypeAndName& args) { |
104 | 14.3k | return std::ranges::any_of(args, [](const auto& elem) { return elem.type->is_nullable(); }); |
105 | 14.3k | } |
106 | | |
107 | | inline Status PreparedFunctionImpl::_execute_skipped_constant_deal( |
108 | | FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, |
109 | 163k | size_t input_rows_count, bool dry_run) const { |
110 | 163k | bool executed = false; |
111 | 163k | RETURN_IF_ERROR(default_implementation_for_nulls(context, block, args, result, input_rows_count, |
112 | 163k | dry_run, &executed)); |
113 | 163k | if (executed) { Branch (113:9): [True: 9.45k, False: 154k]
|
114 | 9.45k | return Status::OK(); |
115 | 9.45k | } |
116 | | |
117 | 154k | if (dry_run) { Branch (117:9): [True: 0, False: 154k]
|
118 | 0 | return execute_impl_dry_run(context, block, args, result, input_rows_count); |
119 | 154k | } else { |
120 | 154k | return execute_impl(context, block, args, result, input_rows_count); |
121 | 154k | } |
122 | 154k | } |
123 | | |
124 | | Status PreparedFunctionImpl::default_implementation_for_constant_arguments( |
125 | | FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, |
126 | 163k | size_t input_rows_count, bool dry_run, bool* executed) const { |
127 | 163k | *executed = false; |
128 | 163k | ColumnNumbers args_expect_const = get_arguments_that_are_always_constant(); |
129 | | |
130 | | // Check that these arguments are really constant. |
131 | 163k | for (auto arg_num : args_expect_const) { Branch (131:23): [True: 143k, False: 163k]
|
132 | 143k | if (arg_num < args.size() && Branch (132:13): [True: 150, False: 143k]
|
133 | 143k | !is_column_const(*block.get_by_position(args[arg_num]).column)) { Branch (133:13): [True: 0, False: 150]
|
134 | 0 | return Status::InvalidArgument("Argument at index {} for function {} must be constant", |
135 | 0 | arg_num, get_name()); |
136 | 0 | } |
137 | 143k | } |
138 | | |
139 | 163k | if (args.empty() || !use_default_implementation_for_constants() || Branch (139:9): [True: 0, False: 163k]
Branch (139:25): [True: 11, False: 163k]
|
140 | 163k | !VectorizedUtils::all_arguments_are_constant(block, args)) { Branch (140:9): [True: 161k, False: 2.15k]
|
141 | 161k | return Status::OK(); |
142 | 161k | } |
143 | | |
144 | | // now all columns are const. |
145 | 2.15k | Block temporary_block; |
146 | | |
147 | 2.15k | size_t arguments_size = args.size(); |
148 | 7.26k | for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num) { Branch (148:30): [True: 5.11k, False: 2.15k]
|
149 | 5.11k | const ColumnWithTypeAndName& column = block.get_by_position(args[arg_num]); |
150 | | // Columns in const_list --> column_const, others --> nested_column |
151 | | // that's because some functions supposes some specific columns always constant. |
152 | | // If we unpack it, there will be unnecessary cost of virtual judge. |
153 | 5.11k | if (args_expect_const.end() != Branch (153:13): [True: 0, False: 5.11k]
|
154 | 5.11k | std::find(args_expect_const.begin(), args_expect_const.end(), arg_num)) { |
155 | 0 | temporary_block.insert({column.column, column.type, column.name}); |
156 | 5.11k | } else { |
157 | 5.11k | temporary_block.insert( |
158 | 5.11k | {assert_cast<const ColumnConst*>(column.column.get())->get_data_column_ptr(), |
159 | 5.11k | column.type, column.name}); |
160 | 5.11k | } |
161 | 5.11k | } |
162 | | |
163 | 2.15k | temporary_block.insert(block.get_by_position(result)); |
164 | | |
165 | 2.15k | ColumnNumbers temporary_argument_numbers(arguments_size); |
166 | 7.26k | for (size_t i = 0; i < arguments_size; ++i) { Branch (166:24): [True: 5.11k, False: 2.15k]
|
167 | 5.11k | temporary_argument_numbers[i] = i; |
168 | 5.11k | } |
169 | | |
170 | 2.15k | RETURN_IF_ERROR(_execute_skipped_constant_deal(context, temporary_block, |
171 | 2.15k | temporary_argument_numbers, arguments_size, |
172 | 2.15k | temporary_block.rows(), dry_run)); |
173 | | |
174 | 2.15k | ColumnPtr result_column; |
175 | | /// extremely rare case, when we have function with completely const arguments |
176 | | /// but some of them produced by non is_deterministic function |
177 | 2.15k | if (temporary_block.get_by_position(arguments_size).column->size() > 1) { Branch (177:9): [True: 0, False: 2.15k]
|
178 | 0 | result_column = temporary_block.get_by_position(arguments_size).column->clone_resized(1); |
179 | 2.15k | } else { |
180 | 2.15k | result_column = temporary_block.get_by_position(arguments_size).column; |
181 | 2.15k | } |
182 | | // We shuold handle the case where the result column is also a ColumnConst. |
183 | 2.15k | block.get_by_position(result).column = ColumnConst::create(result_column, input_rows_count); |
184 | 2.15k | *executed = true; |
185 | 2.15k | return Status::OK(); |
186 | 2.15k | } |
187 | | |
188 | | Status PreparedFunctionImpl::default_implementation_for_nulls( |
189 | | FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, |
190 | 163k | size_t input_rows_count, bool dry_run, bool* executed) const { |
191 | 163k | *executed = false; |
192 | 163k | if (args.empty() || !use_default_implementation_for_nulls()) { Branch (192:9): [True: 0, False: 163k]
Branch (192:25): [True: 143k, False: 20.6k]
|
193 | 143k | return Status::OK(); |
194 | 143k | } |
195 | | |
196 | 43.2k | if (std::ranges::any_of(args, [&block](const auto& elem) { Branch (196:9): [True: 3.20k, False: 17.4k]
|
197 | 43.2k | return block.get_by_position(elem).column->only_null(); |
198 | 43.2k | })) { |
199 | 3.20k | block.get_by_position(result).column = |
200 | 3.20k | block.get_by_position(result).type->create_column_const(input_rows_count, Null()); |
201 | 3.20k | *executed = true; |
202 | 3.20k | return Status::OK(); |
203 | 3.20k | } |
204 | | |
205 | 17.4k | if (have_null_column(block, args)) { Branch (205:9): [True: 6.25k, False: 11.1k]
|
206 | 6.25k | bool need_to_default = need_replace_null_data_to_default(); |
207 | 6.25k | if (context) { Branch (207:13): [True: 6.24k, False: 3]
|
208 | 6.24k | need_to_default &= context->check_overflow_for_decimal(); |
209 | 6.24k | } |
210 | | // extract nested column from nulls |
211 | 6.25k | ColumnNumbers new_args; |
212 | 16.2k | for (auto arg : args) { Branch (212:23): [True: 16.2k, False: 6.25k]
|
213 | 16.2k | new_args.push_back(block.columns()); |
214 | 16.2k | block.insert(block.get_by_position(arg).get_nested(need_to_default)); |
215 | 16.2k | DCHECK(!block.get_by_position(new_args.back()).column->is_nullable()); |
216 | 16.2k | } |
217 | 6.25k | RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result, |
218 | 6.25k | block.rows(), dry_run)); |
219 | | // After run with nested, wrap them in null. Before this, block.get_by_position(result).type |
220 | | // is not compatible with get_by_position(result).column |
221 | 6.25k | block.get_by_position(result).column = wrap_in_nullable( |
222 | 6.25k | block.get_by_position(result).column, block, args, result, input_rows_count); |
223 | | |
224 | 22.5k | while (!new_args.empty()) { Branch (224:16): [True: 16.2k, False: 6.25k]
|
225 | 16.2k | block.erase(new_args.back()); |
226 | 16.2k | new_args.pop_back(); |
227 | 16.2k | } |
228 | 6.25k | *executed = true; |
229 | 6.25k | return Status::OK(); |
230 | 6.25k | } |
231 | 11.1k | return Status::OK(); |
232 | 17.4k | } |
233 | | |
234 | | Status PreparedFunctionImpl::execute_without_low_cardinality_columns( |
235 | | FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, |
236 | 163k | size_t input_rows_count, bool dry_run) const { |
237 | 163k | bool executed = false; |
238 | | |
239 | 163k | RETURN_IF_ERROR(default_implementation_for_constant_arguments( |
240 | 163k | context, block, args, result, input_rows_count, dry_run, &executed)); |
241 | 163k | if (executed) { Branch (241:9): [True: 2.15k, False: 161k]
|
242 | 2.15k | return Status::OK(); |
243 | 2.15k | } |
244 | | |
245 | 161k | return _execute_skipped_constant_deal(context, block, args, result, input_rows_count, dry_run); |
246 | 163k | } |
247 | | |
248 | | Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block, |
249 | | const ColumnNumbers& args, size_t result, |
250 | 157k | size_t input_rows_count, bool dry_run) const { |
251 | 157k | return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count, |
252 | 157k | dry_run); |
253 | 157k | } |
254 | | |
255 | 163k | void FunctionBuilderImpl::check_number_of_arguments(size_t number_of_arguments) const { |
256 | 163k | if (is_variadic()) { Branch (256:9): [True: 6.22k, False: 157k]
|
257 | 6.22k | return; |
258 | 6.22k | } |
259 | | |
260 | 157k | size_t expected_number_of_arguments = get_number_of_arguments(); |
261 | | |
262 | 157k | CHECK_EQ(number_of_arguments, expected_number_of_arguments) << fmt::format( |
263 | 0 | "Number of arguments for function {} doesn't match: passed {} , should be {}", |
264 | 0 | get_name(), number_of_arguments, expected_number_of_arguments); |
265 | 157k | } |
266 | | |
267 | | DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality( |
268 | 163k | const ColumnsWithTypeAndName& arguments) const { |
269 | 163k | check_number_of_arguments(arguments.size()); |
270 | | |
271 | 163k | if (!arguments.empty() && use_default_implementation_for_nulls()) { Branch (271:9): [True: 163k, False: 2]
Branch (271:31): [True: 14.3k, False: 149k]
|
272 | 14.3k | if (have_null_column(arguments)) { Branch (272:13): [True: 9.47k, False: 4.90k]
|
273 | 9.47k | ColumnNumbers numbers(arguments.size()); |
274 | 9.47k | std::iota(numbers.begin(), numbers.end(), 0); |
275 | 9.47k | auto [nested_block, _] = |
276 | 9.47k | create_block_with_nested_columns(Block(arguments), numbers, false); |
277 | 9.47k | auto return_type = get_return_type_impl( |
278 | 9.47k | ColumnsWithTypeAndName(nested_block.begin(), nested_block.end())); |
279 | 9.47k | return make_nullable(return_type); |
280 | 9.47k | } |
281 | 14.3k | } |
282 | | |
283 | 154k | return get_return_type_impl(arguments); |
284 | 163k | } |
285 | | |
286 | 163k | DataTypePtr FunctionBuilderImpl::get_return_type(const ColumnsWithTypeAndName& arguments) const { |
287 | 163k | if (use_default_implementation_for_low_cardinality_columns()) { Branch (287:9): [True: 16.3k, False: 147k]
|
288 | 16.3k | ColumnsWithTypeAndName args_without_low_cardinality(arguments); |
289 | | |
290 | 36.0k | for (ColumnWithTypeAndName& arg : args_without_low_cardinality) { Branch (290:41): [True: 36.0k, False: 16.3k]
|
291 | 36.0k | bool is_const = arg.column && is_column_const(*arg.column); Branch (291:29): [True: 33.9k, False: 2.08k]
Branch (291:43): [True: 17.6k, False: 16.3k]
|
292 | 36.0k | if (is_const) { Branch (292:17): [True: 17.6k, False: 18.4k]
|
293 | 17.6k | arg.column = assert_cast<const ColumnConst&>(*arg.column).remove_low_cardinality(); |
294 | 17.6k | } |
295 | 36.0k | } |
296 | | |
297 | 16.3k | auto type_without_low_cardinality = |
298 | 16.3k | get_return_type_without_low_cardinality(args_without_low_cardinality); |
299 | | |
300 | 16.3k | return type_without_low_cardinality; |
301 | 16.3k | } |
302 | | |
303 | 147k | return get_return_type_without_low_cardinality(arguments); |
304 | 163k | } |
305 | | |
306 | | bool FunctionBuilderImpl::is_date_or_datetime_or_decimal( |
307 | 3 | const DataTypePtr& return_type, const DataTypePtr& func_return_type) const { |
308 | 3 | return (is_date_or_datetime(return_type->is_nullable() Branch (308:13): [True: 2, False: 1]
Branch (308:33): [True: 2, False: 1]
|
309 | 3 | ? ((DataTypeNullable*)return_type.get())->get_nested_type() |
310 | 3 | : return_type) && |
311 | 3 | is_date_or_datetime( Branch (311:13): [True: 2, False: 0]
|
312 | 2 | func_return_type->is_nullable() Branch (312:21): [True: 2, False: 0]
|
313 | 2 | ? ((DataTypeNullable*)func_return_type.get())->get_nested_type() |
314 | 2 | : func_return_type)) || |
315 | 3 | (is_date_v2_or_datetime_v2( Branch (315:13): [True: 0, False: 1]
|
316 | 1 | return_type->is_nullable() Branch (316:21): [True: 0, False: 1]
|
317 | 1 | ? ((DataTypeNullable*)return_type.get())->get_nested_type() |
318 | 1 | : return_type) && |
319 | 1 | is_date_v2_or_datetime_v2( Branch (319:13): [True: 0, False: 0]
|
320 | 0 | func_return_type->is_nullable() Branch (320:21): [True: 0, False: 0]
|
321 | 0 | ? ((DataTypeNullable*)func_return_type.get())->get_nested_type() |
322 | 0 | : func_return_type)) || |
323 | | // For some date functions such as str_to_date(string, string), return_type will |
324 | | // be datetimev2 if users enable datev2 but get_return_type(arguments) will still |
325 | | // return datetime. We need keep backward compatibility here. |
326 | 3 | (is_date_v2_or_datetime_v2( Branch (326:13): [True: 0, False: 1]
|
327 | 1 | return_type->is_nullable() Branch (327:21): [True: 0, False: 1]
|
328 | 1 | ? ((DataTypeNullable*)return_type.get())->get_nested_type() |
329 | 1 | : return_type) && |
330 | 1 | is_date_or_datetime( Branch (330:13): [True: 0, False: 0]
|
331 | 0 | func_return_type->is_nullable() Branch (331:21): [True: 0, False: 0]
|
332 | 0 | ? ((DataTypeNullable*)func_return_type.get())->get_nested_type() |
333 | 0 | : func_return_type)) || |
334 | 3 | (is_date_or_datetime(return_type->is_nullable() Branch (334:13): [True: 0, False: 1]
Branch (334:33): [True: 0, False: 1]
|
335 | 1 | ? ((DataTypeNullable*)return_type.get())->get_nested_type() |
336 | 1 | : return_type) && |
337 | 1 | is_date_v2_or_datetime_v2( Branch (337:13): [True: 0, False: 0]
|
338 | 0 | func_return_type->is_nullable() Branch (338:21): [True: 0, False: 0]
|
339 | 0 | ? ((DataTypeNullable*)func_return_type.get())->get_nested_type() |
340 | 0 | : func_return_type)) || |
341 | 3 | (is_decimal(return_type->is_nullable() Branch (341:13): [True: 0, False: 1]
Branch (341:24): [True: 0, False: 1]
|
342 | 1 | ? ((DataTypeNullable*)return_type.get())->get_nested_type() |
343 | 1 | : return_type) && |
344 | 1 | is_decimal(func_return_type->is_nullable() Branch (344:13): [True: 0, False: 0]
Branch (344:24): [True: 0, False: 0]
|
345 | 0 | ? ((DataTypeNullable*)func_return_type.get())->get_nested_type() |
346 | 0 | : func_return_type)); |
347 | 3 | } |
348 | | |
349 | | bool FunctionBuilderImpl::is_array_nested_type_date_or_datetime_or_decimal( |
350 | 1 | const DataTypePtr& return_type, const DataTypePtr& func_return_type) const { |
351 | 1 | auto return_type_ptr = return_type->is_nullable() Branch (351:28): [True: 0, False: 1]
|
352 | 1 | ? ((DataTypeNullable*)return_type.get())->get_nested_type() |
353 | 1 | : return_type; |
354 | 1 | auto func_return_type_ptr = |
355 | 1 | func_return_type->is_nullable() Branch (355:13): [True: 1, False: 0]
|
356 | 1 | ? ((DataTypeNullable*)func_return_type.get())->get_nested_type() |
357 | 1 | : func_return_type; |
358 | 1 | if (!(is_array(return_type_ptr) && is_array(func_return_type_ptr))) { Branch (358:11): [True: 0, False: 1]
Branch (358:40): [True: 0, False: 0]
|
359 | 1 | return false; |
360 | 1 | } |
361 | 0 | auto nested_nullable_return_type_ptr = |
362 | 0 | (assert_cast<const DataTypeArray*>(return_type_ptr.get()))->get_nested_type(); |
363 | 0 | auto nested_nullable_func_return_type = |
364 | 0 | (assert_cast<const DataTypeArray*>(func_return_type_ptr.get()))->get_nested_type(); |
365 | | // There must be nullable inside array type. |
366 | 0 | if (nested_nullable_return_type_ptr->is_nullable() && Branch (366:9): [True: 0, False: 0]
|
367 | 0 | nested_nullable_func_return_type->is_nullable()) { Branch (367:9): [True: 0, False: 0]
|
368 | 0 | auto nested_return_type_ptr = |
369 | 0 | ((DataTypeNullable*)(nested_nullable_return_type_ptr.get()))->get_nested_type(); |
370 | 0 | auto nested_func_return_type_ptr = |
371 | 0 | ((DataTypeNullable*)(nested_nullable_func_return_type.get()))->get_nested_type(); |
372 | 0 | return is_date_or_datetime_or_decimal(nested_return_type_ptr, nested_func_return_type_ptr); |
373 | 0 | } |
374 | 0 | return false; |
375 | 0 | } |
376 | | } // namespace doris::vectorized |