Coverage Report

Created: 2026-03-12 17:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_collection_in.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
19
#pragma once
20
21
#include <glog/logging.h>
22
#include <stddef.h>
23
24
#include <algorithm>
25
#include <memory>
26
#include <unordered_set>
27
#include <utility>
28
#include <vector>
29
30
#include "common/status.h"
31
#include "core/block/block.h"
32
#include "core/column/column.h"
33
#include "core/column/column_const.h"
34
#include "core/column/column_nullable.h"
35
#include "core/column/column_struct.h"
36
#include "core/column/column_vector.h"
37
#include "core/data_type/data_type_factory.hpp"
38
#include "core/data_type/data_type_nullable.h"
39
#include "core/data_type/data_type_number.h"
40
#include "exprs/function/function.h"
41
42
namespace doris {
43
#include "common/compile_check_begin.h"
44
struct ColumnRowRef {
45
    ENABLE_FACTORY_CREATOR(ColumnRowRef);
46
    ColumnPtr column;
47
    size_t row_idx;
48
49
    // equals when call set insert, this operator will be used
50
6
    bool operator==(const ColumnRowRef& other) const {
51
6
        return column->compare_at(row_idx, other.row_idx, *other.column, 0) == 0;
52
6
    }
53
    // compare
54
0
    bool operator<(const ColumnRowRef& other) const {
55
0
        return column->compare_at(row_idx, other.row_idx, *other.column, 0) < 0;
56
0
    }
57
58
    // when call set find, will use hash to find
59
43
    size_t operator()(const ColumnRowRef& a) const {
60
43
        uint32_t hash_val = 0;
61
43
        a.column->update_crc_with_value(a.row_idx, a.row_idx + 1, hash_val, nullptr);
62
43
        return hash_val;
63
43
    }
64
};
65
66
struct CollectionInState {
67
    ENABLE_FACTORY_CREATOR(CollectionInState)
68
    std::unordered_set<ColumnRowRef, ColumnRowRef> args_set;
69
    bool null_in_set = false;
70
};
71
72
template <bool negative>
73
class FunctionCollectionIn : public IFunction {
74
public:
75
    static constexpr auto name = negative ? "collection_not_in" : "collection_in";
76
77
30
    static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }
_ZN5doris20FunctionCollectionInILb0EE6createEv
Line
Count
Source
77
18
    static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }
_ZN5doris20FunctionCollectionInILb1EE6createEv
Line
Count
Source
77
12
    static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }
78
79
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE8get_nameB5cxx11Ev
80
81
14
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionCollectionInILb0EE11is_variadicEv
Line
Count
Source
81
10
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionCollectionInILb1EE11is_variadicEv
Line
Count
Source
81
4
    bool is_variadic() const override { return true; }
82
83
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE23get_number_of_argumentsEv
84
85
12
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
86
26
        for (const auto& arg : args) {
87
26
            if (arg->is_nullable()) {
88
8
                return make_nullable(std::make_shared<DataTypeUInt8>());
89
8
            }
90
26
        }
91
4
        return std::make_shared<DataTypeUInt8>();
92
12
    }
_ZNK5doris20FunctionCollectionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
85
9
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
86
16
        for (const auto& arg : args) {
87
16
            if (arg->is_nullable()) {
88
7
                return make_nullable(std::make_shared<DataTypeUInt8>());
89
7
            }
90
16
        }
91
2
        return std::make_shared<DataTypeUInt8>();
92
9
    }
_ZNK5doris20FunctionCollectionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
85
3
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
86
10
        for (const auto& arg : args) {
87
10
            if (arg->is_nullable()) {
88
1
                return make_nullable(std::make_shared<DataTypeUInt8>());
89
1
            }
90
10
        }
91
2
        return std::make_shared<DataTypeUInt8>();
92
3
    }
93
94
30
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionCollectionInILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
94
24
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionCollectionInILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
94
6
    bool use_default_implementation_for_nulls() const override { return false; }
95
96
    // make data in context into a set
97
96
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
98
96
        if (scope == FunctionContext::THREAD_LOCAL) {
99
84
            return Status::OK();
100
84
        }
101
12
        int num_args = context->get_num_args();
102
12
        DCHECK(num_args >= 1);
103
104
12
        std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>();
105
12
        context->set_function_state(scope, state);
106
107
12
        DataTypePtr args_type = remove_nullable(context->get_arg_type(0));
108
12
        MutableColumnPtr args_column_ptr = args_type->create_column();
109
110
42
        for (int i = 1; i < num_args; i++) {
111
            // FE should make element type consistent and
112
            // equalize the length of the elements in struct
113
30
            const auto& const_column_ptr = context->get_constant_col(i);
114
            // Types like struct, array, and map only support constant expressions.
115
30
            DCHECK(const_column_ptr != nullptr);
116
30
            const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
117
30
            if (col->is_nullable()) {
118
5
                const auto* null_col = check_and_get_column<ColumnNullable>(col.get());
119
5
                if (null_col->has_null()) {
120
5
                    state->null_in_set = true;
121
5
                } else {
122
0
                    args_column_ptr->insert_from(null_col->get_nested_column(), 0);
123
0
                }
124
25
            } else {
125
25
                args_column_ptr->insert_from(*col, 0);
126
25
            }
127
30
        }
128
12
        ColumnPtr column_ptr = std::move(args_column_ptr);
129
        // make collection ref into set
130
12
        auto col_size = column_ptr->size();
131
37
        for (size_t i = 0; i < col_size; i++) {
132
25
            state->args_set.insert({column_ptr, i});
133
25
        }
134
135
12
        return Status::OK();
136
96
    }
_ZN5doris20FunctionCollectionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
97
87
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
98
87
        if (scope == FunctionContext::THREAD_LOCAL) {
99
78
            return Status::OK();
100
78
        }
101
9
        int num_args = context->get_num_args();
102
9
        DCHECK(num_args >= 1);
103
104
9
        std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>();
105
9
        context->set_function_state(scope, state);
106
107
9
        DataTypePtr args_type = remove_nullable(context->get_arg_type(0));
108
9
        MutableColumnPtr args_column_ptr = args_type->create_column();
109
110
32
        for (int i = 1; i < num_args; i++) {
111
            // FE should make element type consistent and
112
            // equalize the length of the elements in struct
113
23
            const auto& const_column_ptr = context->get_constant_col(i);
114
            // Types like struct, array, and map only support constant expressions.
115
23
            DCHECK(const_column_ptr != nullptr);
116
23
            const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
117
23
            if (col->is_nullable()) {
118
4
                const auto* null_col = check_and_get_column<ColumnNullable>(col.get());
119
4
                if (null_col->has_null()) {
120
4
                    state->null_in_set = true;
121
4
                } else {
122
0
                    args_column_ptr->insert_from(null_col->get_nested_column(), 0);
123
0
                }
124
19
            } else {
125
19
                args_column_ptr->insert_from(*col, 0);
126
19
            }
127
23
        }
128
9
        ColumnPtr column_ptr = std::move(args_column_ptr);
129
        // make collection ref into set
130
9
        auto col_size = column_ptr->size();
131
28
        for (size_t i = 0; i < col_size; i++) {
132
19
            state->args_set.insert({column_ptr, i});
133
19
        }
134
135
9
        return Status::OK();
136
87
    }
_ZN5doris20FunctionCollectionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
97
9
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
98
9
        if (scope == FunctionContext::THREAD_LOCAL) {
99
6
            return Status::OK();
100
6
        }
101
3
        int num_args = context->get_num_args();
102
3
        DCHECK(num_args >= 1);
103
104
3
        std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>();
105
3
        context->set_function_state(scope, state);
106
107
3
        DataTypePtr args_type = remove_nullable(context->get_arg_type(0));
108
3
        MutableColumnPtr args_column_ptr = args_type->create_column();
109
110
10
        for (int i = 1; i < num_args; i++) {
111
            // FE should make element type consistent and
112
            // equalize the length of the elements in struct
113
7
            const auto& const_column_ptr = context->get_constant_col(i);
114
            // Types like struct, array, and map only support constant expressions.
115
7
            DCHECK(const_column_ptr != nullptr);
116
7
            const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
117
7
            if (col->is_nullable()) {
118
1
                const auto* null_col = check_and_get_column<ColumnNullable>(col.get());
119
1
                if (null_col->has_null()) {
120
1
                    state->null_in_set = true;
121
1
                } else {
122
0
                    args_column_ptr->insert_from(null_col->get_nested_column(), 0);
123
0
                }
124
6
            } else {
125
6
                args_column_ptr->insert_from(*col, 0);
126
6
            }
127
7
        }
128
3
        ColumnPtr column_ptr = std::move(args_column_ptr);
129
        // make collection ref into set
130
3
        auto col_size = column_ptr->size();
131
9
        for (size_t i = 0; i < col_size; i++) {
132
6
            state->args_set.insert({column_ptr, i});
133
6
        }
134
135
3
        return Status::OK();
136
9
    }
137
138
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
139
18
                        uint32_t result, size_t input_rows_count) const override {
140
18
        auto in_state = reinterpret_cast<CollectionInState*>(
141
18
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
142
18
        if (!in_state) {
143
0
            return Status::RuntimeError("function context for function '{}' must have Set;",
144
0
                                        get_name());
145
0
        }
146
18
        const auto& args_set = in_state->args_set;
147
18
        const bool null_in_set = in_state->null_in_set;
148
18
        auto res = ColumnUInt8::create();
149
18
        ColumnUInt8::Container& vec_res = res->get_data();
150
18
        vec_res.resize(input_rows_count);
151
152
18
        ColumnUInt8::MutablePtr col_null_map_to;
153
18
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
154
18
        auto& vec_null_map_to = col_null_map_to->get_data();
155
156
18
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
157
18
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
158
18
        auto materialized_column_not_null = materialized_column;
159
18
        if (materialized_column_not_null->is_nullable()) {
160
12
            materialized_column_not_null = assert_cast<ColumnPtr>(
161
12
                    check_and_get_column<ColumnNullable>(materialized_column_not_null.get())
162
12
                            ->get_nested_column_ptr());
163
12
        }
164
165
36
        for (size_t i = 0; i < input_rows_count; ++i) {
166
18
            bool find = args_set.find({materialized_column_not_null, i}) != args_set.end();
167
168
18
            if constexpr (negative) {
169
3
                vec_res[i] = !find;
170
15
            } else {
171
15
                vec_res[i] = find;
172
15
            }
173
174
18
            if (null_in_set) {
175
8
                vec_null_map_to[i] = negative == vec_res[i];
176
10
            } else {
177
10
                vec_null_map_to[i] = false;
178
10
            }
179
18
        }
180
181
18
        if (block.get_by_position(result).type->is_nullable()) {
182
14
            block.replace_by_position(
183
14
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
184
14
        } else {
185
4
            block.replace_by_position(result, std::move(res));
186
4
        }
187
18
        return Status::OK();
188
18
    }
_ZNK5doris20FunctionCollectionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
139
15
                        uint32_t result, size_t input_rows_count) const override {
140
15
        auto in_state = reinterpret_cast<CollectionInState*>(
141
15
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
142
15
        if (!in_state) {
143
0
            return Status::RuntimeError("function context for function '{}' must have Set;",
144
0
                                        get_name());
145
0
        }
146
15
        const auto& args_set = in_state->args_set;
147
15
        const bool null_in_set = in_state->null_in_set;
148
15
        auto res = ColumnUInt8::create();
149
15
        ColumnUInt8::Container& vec_res = res->get_data();
150
15
        vec_res.resize(input_rows_count);
151
152
15
        ColumnUInt8::MutablePtr col_null_map_to;
153
15
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
154
15
        auto& vec_null_map_to = col_null_map_to->get_data();
155
156
15
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
157
15
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
158
15
        auto materialized_column_not_null = materialized_column;
159
15
        if (materialized_column_not_null->is_nullable()) {
160
12
            materialized_column_not_null = assert_cast<ColumnPtr>(
161
12
                    check_and_get_column<ColumnNullable>(materialized_column_not_null.get())
162
12
                            ->get_nested_column_ptr());
163
12
        }
164
165
30
        for (size_t i = 0; i < input_rows_count; ++i) {
166
15
            bool find = args_set.find({materialized_column_not_null, i}) != args_set.end();
167
168
            if constexpr (negative) {
169
                vec_res[i] = !find;
170
15
            } else {
171
15
                vec_res[i] = find;
172
15
            }
173
174
15
            if (null_in_set) {
175
7
                vec_null_map_to[i] = negative == vec_res[i];
176
8
            } else {
177
8
                vec_null_map_to[i] = false;
178
8
            }
179
15
        }
180
181
15
        if (block.get_by_position(result).type->is_nullable()) {
182
13
            block.replace_by_position(
183
13
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
184
13
        } else {
185
2
            block.replace_by_position(result, std::move(res));
186
2
        }
187
15
        return Status::OK();
188
15
    }
_ZNK5doris20FunctionCollectionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
139
3
                        uint32_t result, size_t input_rows_count) const override {
140
3
        auto in_state = reinterpret_cast<CollectionInState*>(
141
3
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
142
3
        if (!in_state) {
143
0
            return Status::RuntimeError("function context for function '{}' must have Set;",
144
0
                                        get_name());
145
0
        }
146
3
        const auto& args_set = in_state->args_set;
147
3
        const bool null_in_set = in_state->null_in_set;
148
3
        auto res = ColumnUInt8::create();
149
3
        ColumnUInt8::Container& vec_res = res->get_data();
150
3
        vec_res.resize(input_rows_count);
151
152
3
        ColumnUInt8::MutablePtr col_null_map_to;
153
3
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
154
3
        auto& vec_null_map_to = col_null_map_to->get_data();
155
156
3
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
157
3
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
158
3
        auto materialized_column_not_null = materialized_column;
159
3
        if (materialized_column_not_null->is_nullable()) {
160
0
            materialized_column_not_null = assert_cast<ColumnPtr>(
161
0
                    check_and_get_column<ColumnNullable>(materialized_column_not_null.get())
162
0
                            ->get_nested_column_ptr());
163
0
        }
164
165
6
        for (size_t i = 0; i < input_rows_count; ++i) {
166
3
            bool find = args_set.find({materialized_column_not_null, i}) != args_set.end();
167
168
3
            if constexpr (negative) {
169
3
                vec_res[i] = !find;
170
            } else {
171
                vec_res[i] = find;
172
            }
173
174
3
            if (null_in_set) {
175
1
                vec_null_map_to[i] = negative == vec_res[i];
176
2
            } else {
177
2
                vec_null_map_to[i] = false;
178
2
            }
179
3
        }
180
181
3
        if (block.get_by_position(result).type->is_nullable()) {
182
1
            block.replace_by_position(
183
1
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
184
2
        } else {
185
2
            block.replace_by_position(result, std::move(res));
186
2
        }
187
3
        return Status::OK();
188
3
    }
189
};
190
191
} // namespace doris
192
193
#include "common/compile_check_end.h"