Coverage Report

Created: 2025-06-12 01:25

/root/doris/be/src/vec/functions/function.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IFunction.cpp
19
// and modified by Doris
20
21
#include "vec/functions/function.h"
22
23
#include <algorithm>
24
#include <memory>
25
#include <numeric>
26
27
#include "vec/aggregate_functions/aggregate_function.h"
28
#include "vec/columns/column.h"
29
#include "vec/columns/column_const.h"
30
#include "vec/columns/column_nullable.h"
31
#include "vec/columns/column_vector.h"
32
#include "vec/common/assert_cast.h"
33
#include "vec/core/field.h"
34
#include "vec/data_types/data_type_array.h"
35
#include "vec/data_types/data_type_nothing.h"
36
#include "vec/data_types/data_type_nullable.h"
37
#include "vec/functions/function_helpers.h"
38
#include "vec/utils/util.hpp"
39
40
namespace doris::vectorized {
41
42
ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args,
43
920k
                           uint32_t result, size_t input_rows_count) {
44
920k
    ColumnPtr result_null_map_column;
45
    /// If result is already nullable.
46
920k
    ColumnPtr src_not_nullable = src;
47
920k
    MutableColumnPtr mutable_result_null_map_column;
48
49
920k
    if (const auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
50
562k
        src_not_nullable = nullable->get_nested_column_ptr();
51
562k
        result_null_map_column = nullable->get_null_map_column_ptr();
52
562k
    }
53
54
1.84M
    for (const auto& arg : args) {
55
1.84M
        const ColumnWithTypeAndName& elem = block.get_by_position(arg);
56
1.84M
        if (!elem.type->is_nullable() || is_column_const(*elem.column)) {
57
921k
            continue;
58
921k
        }
59
60
925k
        if (const auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get());
61
925k
            nullable->has_null()) {
62
234
            const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr();
63
234
            if (!result_null_map_column) { // NOLINT(bugprone-use-after-move)
64
108
                result_null_map_column = null_map_column->clone_resized(input_rows_count);
65
108
                continue;
66
108
            }
67
68
126
            if (!mutable_result_null_map_column) {
69
103
                mutable_result_null_map_column =
70
103
                        std::move(result_null_map_column)->assume_mutable();
71
103
            }
72
73
126
            NullMap& result_null_map =
74
126
                    assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data();
75
126
            const NullMap& src_null_map =
76
126
                    assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
77
78
126
            VectorizedUtils::update_null_map(result_null_map, src_null_map);
79
126
        }
80
925k
    }
81
82
920k
    if (!result_null_map_column) {
83
358k
        if (is_column_const(*src)) {
84
0
            return ColumnConst::create(
85
0
                    make_nullable(assert_cast<const ColumnConst&>(*src).get_data_column_ptr(),
86
0
                                  false),
87
0
                    input_rows_count);
88
0
        }
89
358k
        return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0));
90
358k
    }
91
92
562k
    return ColumnNullable::create(src_not_nullable, result_null_map_column);
93
920k
}
94
95
206k
bool have_null_column(const Block& block, const ColumnNumbers& args) {
96
217k
    return std::ranges::any_of(args, [&block](const auto& elem) {
97
217k
        return block.get_by_position(elem).type->is_nullable();
98
217k
    });
99
206k
}
100
101
202k
bool have_null_column(const ColumnsWithTypeAndName& args) {
102
202k
    return std::ranges::any_of(args, [](const auto& elem) { return elem.type->is_nullable(); });
103
202k
}
104
105
inline Status PreparedFunctionImpl::_execute_skipped_constant_deal(
106
        FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result,
107
1.12M
        size_t input_rows_count, bool dry_run) const {
108
1.12M
    bool executed = false;
109
1.12M
    RETURN_IF_ERROR(default_implementation_for_nulls(context, block, args, result, input_rows_count,
110
1.12M
                                                     dry_run, &executed));
111
1.12M
    if (executed) {
112
10.2k
        return Status::OK();
113
10.2k
    }
114
115
1.11M
    if (dry_run) {
116
0
        return execute_impl_dry_run(context, block, args, result, input_rows_count);
117
1.11M
    } else {
118
1.11M
        return execute_impl(context, block, args, result, input_rows_count);
119
1.11M
    }
120
1.11M
}
121
122
Status PreparedFunctionImpl::default_implementation_for_constant_arguments(
123
        FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result,
124
1.12M
        size_t input_rows_count, bool dry_run, bool* executed) const {
125
1.12M
    *executed = false;
126
1.12M
    ColumnNumbers args_expect_const = get_arguments_that_are_always_constant();
127
128
    // Check that these arguments are really constant.
129
1.12M
    for (auto arg_num : args_expect_const) {
130
1.10M
        if (arg_num < args.size() &&
131
1.10M
            !is_column_const(*block.get_by_position(args[arg_num]).column)) {
132
0
            return Status::InvalidArgument("Argument at index {} for function {} must be constant",
133
0
                                           arg_num, get_name());
134
0
        }
135
1.10M
    }
136
137
1.12M
    if (args.empty() || !use_default_implementation_for_constants() ||
138
1.12M
        !VectorizedUtils::all_arguments_are_constant(block, args)) {
139
1.12M
        return Status::OK();
140
1.12M
    }
141
142
    // now all columns are const.
143
2.17k
    Block temporary_block;
144
145
2.17k
    size_t arguments_size = args.size();
146
7.30k
    for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num) {
147
5.12k
        const ColumnWithTypeAndName& column = block.get_by_position(args[arg_num]);
148
        // Columns in const_list --> column_const,    others --> nested_column
149
        // that's because some functions supposes some specific columns always constant.
150
        // If we unpack it, there will be unnecessary cost of virtual judge.
151
5.12k
        if (args_expect_const.end() !=
152
5.12k
            std::find(args_expect_const.begin(), args_expect_const.end(), arg_num)) {
153
0
            temporary_block.insert({column.column, column.type, column.name});
154
5.12k
        } else {
155
5.12k
            temporary_block.insert(
156
5.12k
                    {assert_cast<const ColumnConst*>(column.column.get())->get_data_column_ptr(),
157
5.12k
                     column.type, column.name});
158
5.12k
        }
159
5.12k
    }
160
161
2.17k
    temporary_block.insert(block.get_by_position(result));
162
163
2.17k
    ColumnNumbers temporary_argument_numbers(arguments_size);
164
7.30k
    for (size_t i = 0; i < arguments_size; ++i) {
165
5.12k
        temporary_argument_numbers[i] = i;
166
5.12k
    }
167
168
2.17k
    RETURN_IF_ERROR(_execute_skipped_constant_deal(context, temporary_block,
169
2.17k
                                                   temporary_argument_numbers, arguments_size,
170
2.17k
                                                   temporary_block.rows(), dry_run));
171
172
2.17k
    ColumnPtr result_column;
173
    /// extremely rare case, when we have function with completely const arguments
174
    /// but some of them produced by non is_deterministic function
175
2.17k
    if (temporary_block.get_by_position(arguments_size).column->size() > 1) {
176
0
        result_column = temporary_block.get_by_position(arguments_size).column->clone_resized(1);
177
2.17k
    } else {
178
2.17k
        result_column = temporary_block.get_by_position(arguments_size).column;
179
2.17k
    }
180
    // We shuold handle the case where the result column is also a ColumnConst.
181
2.17k
    block.get_by_position(result).column = ColumnConst::create(result_column, input_rows_count);
182
2.17k
    *executed = true;
183
2.17k
    return Status::OK();
184
2.17k
}
185
186
Status PreparedFunctionImpl::default_implementation_for_nulls(
187
        FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result,
188
1.12M
        size_t input_rows_count, bool dry_run, bool* executed) const {
189
1.12M
    *executed = false;
190
1.12M
    if (args.empty() || !use_default_implementation_for_nulls()) {
191
915k
        return Status::OK();
192
915k
    }
193
194
235k
    if (std::ranges::any_of(args, [&block](const auto& elem) {
195
235k
            return block.get_by_position(elem).column->only_null();
196
235k
        })) {
197
3.29k
        block.get_by_position(result).column =
198
3.29k
                block.get_by_position(result).type->create_column_const(input_rows_count, Field());
199
3.29k
        *executed = true;
200
3.29k
        return Status::OK();
201
3.29k
    }
202
203
206k
    if (have_null_column(block, args)) {
204
6.90k
        bool need_to_default = need_replace_null_data_to_default();
205
6.90k
        if (context) {
206
6.90k
            need_to_default &= context->check_overflow_for_decimal();
207
6.90k
        }
208
        // extract nested column from nulls
209
6.90k
        ColumnNumbers new_args;
210
18.6k
        for (auto arg : args) {
211
18.6k
            new_args.push_back(block.columns());
212
18.6k
            block.insert(block.get_by_position(arg).get_nested(need_to_default));
213
18.6k
            DCHECK(!block.get_by_position(new_args.back()).column->is_nullable());
214
18.6k
        }
215
6.90k
        RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result,
216
6.90k
                                                                block.rows(), dry_run));
217
        // After run with nested, wrap them in null. Before this, block.get_by_position(result).type
218
        // is not compatible with get_by_position(result).column
219
6.90k
        block.get_by_position(result).column = wrap_in_nullable(
220
6.90k
                block.get_by_position(result).column, block, args, result, input_rows_count);
221
222
25.5k
        while (!new_args.empty()) {
223
18.6k
            block.erase(new_args.back());
224
18.6k
            new_args.pop_back();
225
18.6k
        }
226
6.90k
        *executed = true;
227
6.90k
        return Status::OK();
228
6.90k
    }
229
199k
    return Status::OK();
230
206k
}
231
232
Status PreparedFunctionImpl::execute_without_low_cardinality_columns(
233
        FunctionContext* context, Block& block, const ColumnNumbers& args, uint32_t result,
234
1.12M
        size_t input_rows_count, bool dry_run) const {
235
1.12M
    bool executed = false;
236
237
1.12M
    RETURN_IF_ERROR(default_implementation_for_constant_arguments(
238
1.12M
            context, block, args, result, input_rows_count, dry_run, &executed));
239
1.12M
    if (executed) {
240
2.17k
        return Status::OK();
241
2.17k
    }
242
243
1.12M
    return _execute_skipped_constant_deal(context, block, args, result, input_rows_count, dry_run);
244
1.12M
}
245
246
Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block,
247
                                     const ColumnNumbers& args, uint32_t result,
248
1.11M
                                     size_t input_rows_count, bool dry_run) const {
249
1.11M
    return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count,
250
1.11M
                                                   dry_run);
251
1.11M
}
252
253
1.11M
void FunctionBuilderImpl::check_number_of_arguments(size_t number_of_arguments) const {
254
1.11M
    if (is_variadic()) {
255
193k
        return;
256
193k
    }
257
258
924k
    size_t expected_number_of_arguments = get_number_of_arguments();
259
260
924k
    CHECK_EQ(number_of_arguments, expected_number_of_arguments) << fmt::format(
261
0
            "Number of arguments for function {} doesn't match: passed {} , should be {}",
262
0
            get_name(), number_of_arguments, expected_number_of_arguments);
263
924k
}
264
265
DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality(
266
1.11M
        const ColumnsWithTypeAndName& arguments) const {
267
1.11M
    check_number_of_arguments(arguments.size());
268
269
1.11M
    if (!arguments.empty() && use_default_implementation_for_nulls()) {
270
202k
        if (have_null_column(arguments)) {
271
10.2k
            ColumnNumbers numbers(arguments.size());
272
10.2k
            std::iota(numbers.begin(), numbers.end(), 0);
273
10.2k
            auto [nested_block, _] =
274
10.2k
                    create_block_with_nested_columns(Block(arguments), numbers, false);
275
10.2k
            auto return_type = get_return_type_impl(
276
10.2k
                    ColumnsWithTypeAndName(nested_block.begin(), nested_block.end()));
277
10.2k
            return make_nullable(return_type);
278
10.2k
        }
279
202k
    }
280
281
1.10M
    return get_return_type_impl(arguments);
282
1.11M
}
283
284
1.11M
DataTypePtr FunctionBuilderImpl::get_return_type(const ColumnsWithTypeAndName& arguments) const {
285
1.11M
    if (use_default_implementation_for_low_cardinality_columns()) {
286
203k
        ColumnsWithTypeAndName args_without_low_cardinality(arguments);
287
203k
        auto type_without_low_cardinality =
288
203k
                get_return_type_without_low_cardinality(args_without_low_cardinality);
289
290
203k
        return type_without_low_cardinality;
291
203k
    }
292
293
913k
    return get_return_type_without_low_cardinality(arguments);
294
1.11M
}
295
296
bool FunctionBuilderImpl::is_date_or_datetime_or_decimal(
297
670k
        const DataTypePtr& return_type, const DataTypePtr& func_return_type) const {
298
670k
    return (is_date_or_datetime(return_type->get_primitive_type()) &&
299
670k
            is_date_or_datetime(func_return_type->get_primitive_type())) ||
300
670k
           (is_date_v2_or_datetime_v2(return_type->get_primitive_type()) &&
301
670k
            is_date_v2_or_datetime_v2(func_return_type->get_primitive_type())) ||
302
           // For some date functions such as str_to_date(string, string), return_type will
303
           // be datetimev2 if users enable datev2 but get_return_type(arguments) will still
304
           // return datetime. We need keep backward compatibility here.
305
670k
           (is_date_v2_or_datetime_v2(return_type->get_primitive_type()) &&
306
670k
            is_date_or_datetime(func_return_type->get_primitive_type())) ||
307
670k
           (is_date_or_datetime(return_type->get_primitive_type()) &&
308
670k
            is_date_v2_or_datetime_v2(func_return_type->get_primitive_type())) ||
309
670k
           (is_decimal(return_type->get_primitive_type()) &&
310
670k
            is_decimal(func_return_type->get_primitive_type()));
311
670k
}
312
313
bool FunctionBuilderImpl::is_array_nested_type_date_or_datetime_or_decimal(
314
0
        const DataTypePtr& return_type, const DataTypePtr& func_return_type) const {
315
0
    auto return_type_ptr = return_type->is_nullable()
316
0
                                   ? ((DataTypeNullable*)return_type.get())->get_nested_type()
317
0
                                   : return_type;
318
0
    auto func_return_type_ptr =
319
0
            func_return_type->is_nullable()
320
0
                    ? ((DataTypeNullable*)func_return_type.get())->get_nested_type()
321
0
                    : func_return_type;
322
0
    if (!(return_type_ptr->get_primitive_type() == TYPE_ARRAY &&
323
0
          func_return_type_ptr->get_primitive_type() == TYPE_ARRAY)) {
324
0
        return false;
325
0
    }
326
0
    auto nested_nullable_return_type_ptr =
327
0
            (assert_cast<const DataTypeArray*>(return_type_ptr.get()))->get_nested_type();
328
0
    auto nested_nullable_func_return_type =
329
0
            (assert_cast<const DataTypeArray*>(func_return_type_ptr.get()))->get_nested_type();
330
    // There must be nullable inside array type.
331
0
    if (nested_nullable_return_type_ptr->is_nullable() &&
332
0
        nested_nullable_func_return_type->is_nullable()) {
333
0
        auto nested_return_type_ptr =
334
0
                ((DataTypeNullable*)(nested_nullable_return_type_ptr.get()))->get_nested_type();
335
0
        auto nested_func_return_type_ptr =
336
0
                ((DataTypeNullable*)(nested_nullable_func_return_type.get()))->get_nested_type();
337
0
        return is_date_or_datetime_or_decimal(nested_return_type_ptr, nested_func_return_type_ptr);
338
0
    }
339
0
    return false;
340
0
}
341
} // namespace doris::vectorized