Coverage Report

Created: 2025-04-27 14:25

/root/doris/be/src/vec/functions/function.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IFunction.cpp
19
// and modified by Doris
20
21
#include "vec/functions/function.h"
22
23
#include <algorithm>
24
#include <memory>
25
#include <numeric>
26
#include <vector>
27
28
#include "vec/aggregate_functions/aggregate_function.h"
29
#include "vec/columns/column.h"
30
#include "vec/columns/column_const.h"
31
#include "vec/columns/column_nullable.h"
32
#include "vec/columns/column_vector.h"
33
#include "vec/columns/columns_number.h"
34
#include "vec/common/assert_cast.h"
35
#include "vec/core/field.h"
36
#include "vec/data_types/data_type_array.h"
37
#include "vec/data_types/data_type_nothing.h"
38
#include "vec/data_types/data_type_nullable.h"
39
#include "vec/functions/function_helpers.h"
40
#include "vec/utils/util.hpp"
41
42
namespace doris::vectorized {
43
44
ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args,
45
6.07k
                           size_t result, size_t input_rows_count) {
46
6.07k
    ColumnPtr result_null_map_column;
47
    /// If result is already nullable.
48
6.07k
    ColumnPtr src_not_nullable = src;
49
6.07k
    MutableColumnPtr mutable_result_null_map_column;
50
51
6.07k
    if (const auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
52
1.42k
        src_not_nullable = nullable->get_nested_column_ptr();
53
1.42k
        result_null_map_column = nullable->get_null_map_column_ptr();
54
1.42k
    }
55
56
15.8k
    for (const auto& arg : args) {
57
15.8k
        const ColumnWithTypeAndName& elem = block.get_by_position(arg);
58
15.8k
        if (!elem.type->is_nullable() || is_column_const(*elem.column)) {
59
6.32k
            continue;
60
6.32k
        }
61
62
9.54k
        if (const auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get());
63
9.54k
            nullable->has_null()) {
64
184
            const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr();
65
184
            if (!result_null_map_column) { // NOLINT(bugprone-use-after-move)
66
87
                result_null_map_column = null_map_column->clone_resized(input_rows_count);
67
87
                continue;
68
87
            }
69
70
97
            if (!mutable_result_null_map_column) {
71
76
                mutable_result_null_map_column =
72
76
                        std::move(result_null_map_column)->assume_mutable();
73
76
            }
74
75
97
            NullMap& result_null_map =
76
97
                    assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data();
77
97
            const NullMap& src_null_map =
78
97
                    assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
79
80
97
            VectorizedUtils::update_null_map(result_null_map, src_null_map);
81
97
        }
82
9.54k
    }
83
84
6.07k
    if (!result_null_map_column) {
85
4.56k
        if (is_column_const(*src)) {
86
0
            return ColumnConst::create(
87
0
                    make_nullable(assert_cast<const ColumnConst&>(*src).get_data_column_ptr(),
88
0
                                  false),
89
0
                    input_rows_count);
90
0
        }
91
4.56k
        return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0));
92
4.56k
    }
93
94
1.51k
    return ColumnNullable::create(src_not_nullable, result_null_map_column);
95
6.07k
}
96
97
14.9k
bool have_null_column(const Block& block, const ColumnNumbers& args) {
98
24.5k
    return std::ranges::any_of(args, [&block](const auto& elem) {
99
24.5k
        return block.get_by_position(elem).type->is_nullable();
100
24.5k
    });
101
14.9k
}
102
103
12.1k
bool have_null_column(const ColumnsWithTypeAndName& args) {
104
12.1k
    return std::ranges::any_of(args, [](const auto& elem) { return elem.type->is_nullable(); });
105
12.1k
}
106
107
inline Status PreparedFunctionImpl::_execute_skipped_constant_deal(
108
        FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result,
109
20.2k
        size_t input_rows_count, bool dry_run) const {
110
20.2k
    bool executed = false;
111
20.2k
    RETURN_IF_ERROR(default_implementation_for_nulls(context, block, args, result, input_rows_count,
112
20.2k
                                                     dry_run, &executed));
113
20.2k
    if (executed) {
114
9.10k
        return Status::OK();
115
9.10k
    }
116
117
11.1k
    if (dry_run) {
118
0
        return execute_impl_dry_run(context, block, args, result, input_rows_count);
119
11.1k
    } else {
120
11.1k
        return execute_impl(context, block, args, result, input_rows_count);
121
11.1k
    }
122
11.1k
}
123
124
Status PreparedFunctionImpl::default_implementation_for_constant_arguments(
125
        FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result,
126
20.2k
        size_t input_rows_count, bool dry_run, bool* executed) const {
127
20.2k
    *executed = false;
128
20.2k
    ColumnNumbers args_expect_const = get_arguments_that_are_always_constant();
129
130
    // Check that these arguments are really constant.
131
20.2k
    for (auto arg_num : args_expect_const) {
132
309
        if (arg_num < args.size() &&
133
309
            !is_column_const(*block.get_by_position(args[arg_num]).column)) {
134
0
            return Status::InvalidArgument("Argument at index {} for function {} must be constant",
135
0
                                           arg_num, get_name());
136
0
        }
137
309
    }
138
139
20.2k
    if (args.empty() || !use_default_implementation_for_constants() ||
140
20.2k
        !VectorizedUtils::all_arguments_are_constant(block, args)) {
141
18.1k
        return Status::OK();
142
18.1k
    }
143
144
    // now all columns are const.
145
2.05k
    Block temporary_block;
146
147
2.05k
    size_t arguments_size = args.size();
148
6.97k
    for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num) {
149
4.92k
        const ColumnWithTypeAndName& column = block.get_by_position(args[arg_num]);
150
        // Columns in const_list --> column_const,    others --> nested_column
151
        // that's because some functions supposes some specific columns always constant.
152
        // If we unpack it, there will be unnecessary cost of virtual judge.
153
4.92k
        if (args_expect_const.end() !=
154
4.92k
            std::find(args_expect_const.begin(), args_expect_const.end(), arg_num)) {
155
0
            temporary_block.insert({column.column, column.type, column.name});
156
4.92k
        } else {
157
4.92k
            temporary_block.insert(
158
4.92k
                    {assert_cast<const ColumnConst*>(column.column.get())->get_data_column_ptr(),
159
4.92k
                     column.type, column.name});
160
4.92k
        }
161
4.92k
    }
162
163
2.05k
    temporary_block.insert(block.get_by_position(result));
164
165
2.05k
    ColumnNumbers temporary_argument_numbers(arguments_size);
166
6.97k
    for (size_t i = 0; i < arguments_size; ++i) {
167
4.92k
        temporary_argument_numbers[i] = i;
168
4.92k
    }
169
170
2.05k
    RETURN_IF_ERROR(_execute_skipped_constant_deal(context, temporary_block,
171
2.05k
                                                   temporary_argument_numbers, arguments_size,
172
2.05k
                                                   temporary_block.rows(), dry_run));
173
174
2.05k
    ColumnPtr result_column;
175
    /// extremely rare case, when we have function with completely const arguments
176
    /// but some of them produced by non is_deterministic function
177
2.05k
    if (temporary_block.get_by_position(arguments_size).column->size() > 1) {
178
0
        result_column = temporary_block.get_by_position(arguments_size).column->clone_resized(1);
179
2.05k
    } else {
180
2.05k
        result_column = temporary_block.get_by_position(arguments_size).column;
181
2.05k
    }
182
    // We shuold handle the case where the result column is also a ColumnConst.
183
2.05k
    block.get_by_position(result).column = ColumnConst::create(result_column, input_rows_count);
184
2.05k
    *executed = true;
185
2.05k
    return Status::OK();
186
2.05k
}
187
188
Status PreparedFunctionImpl::default_implementation_for_nulls(
189
        FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result,
190
20.2k
        size_t input_rows_count, bool dry_run, bool* executed) const {
191
20.2k
    *executed = false;
192
20.2k
    if (args.empty() || !use_default_implementation_for_nulls()) {
193
2.17k
        return Status::OK();
194
2.17k
    }
195
196
40.0k
    if (std::ranges::any_of(args, [&block](const auto& elem) {
197
40.0k
            return block.get_by_position(elem).column->only_null();
198
40.0k
        })) {
199
3.14k
        block.get_by_position(result).column =
200
3.14k
                block.get_by_position(result).type->create_column_const(input_rows_count, Null());
201
3.14k
        *executed = true;
202
3.14k
        return Status::OK();
203
3.14k
    }
204
205
14.9k
    if (have_null_column(block, args)) {
206
5.95k
        bool need_to_default = need_replace_null_data_to_default();
207
5.95k
        if (context) {
208
5.95k
            need_to_default &= context->check_overflow_for_decimal();
209
5.95k
        }
210
        // extract nested column from nulls
211
5.95k
        ColumnNumbers new_args;
212
15.6k
        for (auto arg : args) {
213
15.6k
            new_args.push_back(block.columns());
214
15.6k
            block.insert(block.get_by_position(arg).get_nested(need_to_default));
215
15.6k
            DCHECK(!block.get_by_position(new_args.back()).column->is_nullable());
216
15.6k
        }
217
5.95k
        RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result,
218
5.95k
                                                                block.rows(), dry_run));
219
        // After run with nested, wrap them in null. Before this, block.get_by_position(result).type
220
        // is not compatible with get_by_position(result).column
221
5.95k
        block.get_by_position(result).column = wrap_in_nullable(
222
5.95k
                block.get_by_position(result).column, block, args, result, input_rows_count);
223
224
21.5k
        while (!new_args.empty()) {
225
15.6k
            block.erase(new_args.back());
226
15.6k
            new_args.pop_back();
227
15.6k
        }
228
5.95k
        *executed = true;
229
5.95k
        return Status::OK();
230
5.95k
    }
231
8.96k
    return Status::OK();
232
14.9k
}
233
234
Status PreparedFunctionImpl::execute_without_low_cardinality_columns(
235
        FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result,
236
20.2k
        size_t input_rows_count, bool dry_run) const {
237
20.2k
    bool executed = false;
238
239
20.2k
    RETURN_IF_ERROR(default_implementation_for_constant_arguments(
240
20.2k
            context, block, args, result, input_rows_count, dry_run, &executed));
241
20.2k
    if (executed) {
242
2.05k
        return Status::OK();
243
2.05k
    }
244
245
18.1k
    return _execute_skipped_constant_deal(context, block, args, result, input_rows_count, dry_run);
246
20.2k
}
247
248
Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block,
249
                                     const ColumnNumbers& args, size_t result,
250
14.2k
                                     size_t input_rows_count, bool dry_run) const {
251
14.2k
    return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count,
252
14.2k
                                                   dry_run);
253
14.2k
}
254
255
14.1k
void FunctionBuilderImpl::check_number_of_arguments(size_t number_of_arguments) const {
256
14.1k
    if (is_variadic()) {
257
4.29k
        return;
258
4.29k
    }
259
260
9.90k
    size_t expected_number_of_arguments = get_number_of_arguments();
261
262
9.90k
    CHECK_EQ(number_of_arguments, expected_number_of_arguments) << fmt::format(
263
0
            "Number of arguments for function {} doesn't match: passed {} , should be {}",
264
0
            get_name(), number_of_arguments, expected_number_of_arguments);
265
9.90k
}
266
267
DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality(
268
14.1k
        const ColumnsWithTypeAndName& arguments) const {
269
14.1k
    check_number_of_arguments(arguments.size());
270
271
14.1k
    if (!arguments.empty() && use_default_implementation_for_nulls()) {
272
12.1k
        if (have_null_column(arguments)) {
273
9.10k
            ColumnNumbers numbers(arguments.size());
274
9.10k
            std::iota(numbers.begin(), numbers.end(), 0);
275
9.10k
            auto [nested_block, _] =
276
9.10k
                    create_block_with_nested_columns(Block(arguments), numbers, false);
277
9.10k
            auto return_type = get_return_type_impl(
278
9.10k
                    ColumnsWithTypeAndName(nested_block.begin(), nested_block.end()));
279
9.10k
            return make_nullable(return_type);
280
9.10k
        }
281
12.1k
    }
282
283
5.08k
    return get_return_type_impl(arguments);
284
14.1k
}
285
286
14.1k
DataTypePtr FunctionBuilderImpl::get_return_type(const ColumnsWithTypeAndName& arguments) const {
287
14.1k
    if (use_default_implementation_for_low_cardinality_columns()) {
288
14.0k
        ColumnsWithTypeAndName args_without_low_cardinality(arguments);
289
290
33.3k
        for (ColumnWithTypeAndName& arg : args_without_low_cardinality) {
291
33.3k
            bool is_const = arg.column && is_column_const(*arg.column);
292
33.3k
            if (is_const) {
293
17.1k
                arg.column = assert_cast<const ColumnConst&>(*arg.column).remove_low_cardinality();
294
17.1k
            }
295
33.3k
        }
296
297
14.0k
        auto type_without_low_cardinality =
298
14.0k
                get_return_type_without_low_cardinality(args_without_low_cardinality);
299
300
14.0k
        return type_without_low_cardinality;
301
14.0k
    }
302
303
130
    return get_return_type_without_low_cardinality(arguments);
304
14.1k
}
305
306
bool FunctionBuilderImpl::is_date_or_datetime_or_decimal(
307
2
        const DataTypePtr& return_type, const DataTypePtr& func_return_type) const {
308
2
    return (is_date_or_datetime(return_type->is_nullable()
309
2
                                        ? ((DataTypeNullable*)return_type.get())->get_nested_type()
310
2
                                        : return_type) &&
311
2
            is_date_or_datetime(
312
2
                    func_return_type->is_nullable()
313
2
                            ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
314
2
                            : func_return_type)) ||
315
2
           (is_date_v2_or_datetime_v2(
316
0
                    return_type->is_nullable()
317
0
                            ? ((DataTypeNullable*)return_type.get())->get_nested_type()
318
0
                            : return_type) &&
319
0
            is_date_v2_or_datetime_v2(
320
0
                    func_return_type->is_nullable()
321
0
                            ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
322
0
                            : func_return_type)) ||
323
           // For some date functions such as str_to_date(string, string), return_type will
324
           // be datetimev2 if users enable datev2 but get_return_type(arguments) will still
325
           // return datetime. We need keep backward compatibility here.
326
2
           (is_date_v2_or_datetime_v2(
327
0
                    return_type->is_nullable()
328
0
                            ? ((DataTypeNullable*)return_type.get())->get_nested_type()
329
0
                            : return_type) &&
330
0
            is_date_or_datetime(
331
0
                    func_return_type->is_nullable()
332
0
                            ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
333
0
                            : func_return_type)) ||
334
2
           (is_date_or_datetime(return_type->is_nullable()
335
0
                                        ? ((DataTypeNullable*)return_type.get())->get_nested_type()
336
0
                                        : return_type) &&
337
0
            is_date_v2_or_datetime_v2(
338
0
                    func_return_type->is_nullable()
339
0
                            ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
340
0
                            : func_return_type)) ||
341
2
           (is_decimal(return_type->is_nullable()
342
0
                               ? ((DataTypeNullable*)return_type.get())->get_nested_type()
343
0
                               : return_type) &&
344
0
            is_decimal(func_return_type->is_nullable()
345
0
                               ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
346
0
                               : func_return_type));
347
2
}
348
349
bool FunctionBuilderImpl::is_array_nested_type_date_or_datetime_or_decimal(
350
0
        const DataTypePtr& return_type, const DataTypePtr& func_return_type) const {
351
0
    auto return_type_ptr = return_type->is_nullable()
352
0
                                   ? ((DataTypeNullable*)return_type.get())->get_nested_type()
353
0
                                   : return_type;
354
0
    auto func_return_type_ptr =
355
0
            func_return_type->is_nullable()
356
0
                    ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
357
0
                    : func_return_type;
358
0
    if (!(is_array(return_type_ptr) && is_array(func_return_type_ptr))) {
359
0
        return false;
360
0
    }
361
0
    auto nested_nullable_return_type_ptr =
362
0
            (assert_cast<const DataTypeArray*>(return_type_ptr.get()))->get_nested_type();
363
0
    auto nested_nullable_func_return_type =
364
0
            (assert_cast<const DataTypeArray*>(func_return_type_ptr.get()))->get_nested_type();
365
    // There must be nullable inside array type.
366
0
    if (nested_nullable_return_type_ptr->is_nullable() &&
367
0
        nested_nullable_func_return_type->is_nullable()) {
368
0
        auto nested_return_type_ptr =
369
0
                ((DataTypeNullable*)(nested_nullable_return_type_ptr.get()))->get_nested_type();
370
0
        auto nested_func_return_type_ptr =
371
0
                ((DataTypeNullable*)(nested_nullable_func_return_type.get()))->get_nested_type();
372
0
        return is_date_or_datetime_or_decimal(nested_return_type_ptr, nested_func_return_type_ptr);
373
0
    }
374
0
    return false;
375
0
}
376
} // namespace doris::vectorized