Coverage Report

Created: 2026-04-13 08:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_collection_in.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
19
#pragma once
20
21
#include <glog/logging.h>
22
#include <stddef.h>
23
24
#include <algorithm>
25
#include <memory>
26
#include <unordered_set>
27
#include <utility>
28
#include <vector>
29
30
#include "common/status.h"
31
#include "core/block/block.h"
32
#include "core/column/column.h"
33
#include "core/column/column_const.h"
34
#include "core/column/column_nullable.h"
35
#include "core/column/column_struct.h"
36
#include "core/column/column_vector.h"
37
#include "core/data_type/data_type_factory.hpp"
38
#include "core/data_type/data_type_nullable.h"
39
#include "core/data_type/data_type_number.h"
40
#include "exprs/function/function.h"
41
42
namespace doris {
43
struct ColumnRowRef {
44
    ENABLE_FACTORY_CREATOR(ColumnRowRef);
45
    ColumnPtr column;
46
    size_t row_idx;
47
48
    // equals when call set insert, this operator will be used
49
6
    bool operator==(const ColumnRowRef& other) const {
50
6
        return column->compare_at(row_idx, other.row_idx, *other.column, 0) == 0;
51
6
    }
52
    // compare
53
0
    bool operator<(const ColumnRowRef& other) const {
54
0
        return column->compare_at(row_idx, other.row_idx, *other.column, 0) < 0;
55
0
    }
56
57
    // when call set find, will use hash to find
58
43
    size_t operator()(const ColumnRowRef& a) const {
59
43
        uint32_t hash_val = 0;
60
43
        a.column->update_crc_with_value(a.row_idx, a.row_idx + 1, hash_val, nullptr);
61
43
        return hash_val;
62
43
    }
63
};
64
65
struct CollectionInState {
66
    ENABLE_FACTORY_CREATOR(CollectionInState)
67
    std::unordered_set<ColumnRowRef, ColumnRowRef> args_set;
68
    bool null_in_set = false;
69
};
70
71
template <bool negative>
72
class FunctionCollectionIn : public IFunction {
73
public:
74
    static constexpr auto name = negative ? "collection_not_in" : "collection_in";
75
76
30
    static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }
_ZN5doris20FunctionCollectionInILb0EE6createEv
Line
Count
Source
76
18
    static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }
_ZN5doris20FunctionCollectionInILb1EE6createEv
Line
Count
Source
76
12
    static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }
77
78
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE8get_nameB5cxx11Ev
79
80
14
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionCollectionInILb0EE11is_variadicEv
Line
Count
Source
80
10
    bool is_variadic() const override { return true; }
_ZNK5doris20FunctionCollectionInILb1EE11is_variadicEv
Line
Count
Source
80
4
    bool is_variadic() const override { return true; }
81
82
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE23get_number_of_argumentsEv
83
84
12
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
85
26
        for (const auto& arg : args) {
86
26
            if (arg->is_nullable()) {
87
8
                return make_nullable(std::make_shared<DataTypeUInt8>());
88
8
            }
89
26
        }
90
4
        return std::make_shared<DataTypeUInt8>();
91
12
    }
_ZNK5doris20FunctionCollectionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
84
9
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
85
16
        for (const auto& arg : args) {
86
16
            if (arg->is_nullable()) {
87
7
                return make_nullable(std::make_shared<DataTypeUInt8>());
88
7
            }
89
16
        }
90
2
        return std::make_shared<DataTypeUInt8>();
91
9
    }
_ZNK5doris20FunctionCollectionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
84
3
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
85
10
        for (const auto& arg : args) {
86
10
            if (arg->is_nullable()) {
87
1
                return make_nullable(std::make_shared<DataTypeUInt8>());
88
1
            }
89
10
        }
90
2
        return std::make_shared<DataTypeUInt8>();
91
3
    }
92
93
30
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionCollectionInILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
93
24
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris20FunctionCollectionInILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
93
6
    bool use_default_implementation_for_nulls() const override { return false; }
94
95
    // make data in context into a set
96
60
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
97
60
        if (scope == FunctionContext::THREAD_LOCAL) {
98
48
            return Status::OK();
99
48
        }
100
12
        int num_args = context->get_num_args();
101
12
        DCHECK(num_args >= 1);
102
103
12
        std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>();
104
12
        context->set_function_state(scope, state);
105
106
12
        DataTypePtr args_type = remove_nullable(context->get_arg_type(0));
107
12
        MutableColumnPtr args_column_ptr = args_type->create_column();
108
109
42
        for (int i = 1; i < num_args; i++) {
110
            // FE should make element type consistent and
111
            // equalize the length of the elements in struct
112
30
            const auto& const_column_ptr = context->get_constant_col(i);
113
            // Types like struct, array, and map only support constant expressions.
114
30
            DCHECK(const_column_ptr != nullptr);
115
30
            const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
116
30
            if (col->is_nullable()) {
117
5
                const auto* null_col = check_and_get_column<ColumnNullable>(col.get());
118
5
                if (null_col->has_null()) {
119
5
                    state->null_in_set = true;
120
5
                } else {
121
0
                    args_column_ptr->insert_from(null_col->get_nested_column(), 0);
122
0
                }
123
25
            } else {
124
25
                args_column_ptr->insert_from(*col, 0);
125
25
            }
126
30
        }
127
12
        ColumnPtr column_ptr = std::move(args_column_ptr);
128
        // make collection ref into set
129
12
        auto col_size = column_ptr->size();
130
37
        for (size_t i = 0; i < col_size; i++) {
131
25
            state->args_set.insert({column_ptr, i});
132
25
        }
133
134
12
        return Status::OK();
135
60
    }
_ZN5doris20FunctionCollectionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
96
51
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
97
51
        if (scope == FunctionContext::THREAD_LOCAL) {
98
42
            return Status::OK();
99
42
        }
100
9
        int num_args = context->get_num_args();
101
9
        DCHECK(num_args >= 1);
102
103
9
        std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>();
104
9
        context->set_function_state(scope, state);
105
106
9
        DataTypePtr args_type = remove_nullable(context->get_arg_type(0));
107
9
        MutableColumnPtr args_column_ptr = args_type->create_column();
108
109
32
        for (int i = 1; i < num_args; i++) {
110
            // FE should make element type consistent and
111
            // equalize the length of the elements in struct
112
23
            const auto& const_column_ptr = context->get_constant_col(i);
113
            // Types like struct, array, and map only support constant expressions.
114
23
            DCHECK(const_column_ptr != nullptr);
115
23
            const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
116
23
            if (col->is_nullable()) {
117
4
                const auto* null_col = check_and_get_column<ColumnNullable>(col.get());
118
4
                if (null_col->has_null()) {
119
4
                    state->null_in_set = true;
120
4
                } else {
121
0
                    args_column_ptr->insert_from(null_col->get_nested_column(), 0);
122
0
                }
123
19
            } else {
124
19
                args_column_ptr->insert_from(*col, 0);
125
19
            }
126
23
        }
127
9
        ColumnPtr column_ptr = std::move(args_column_ptr);
128
        // make collection ref into set
129
9
        auto col_size = column_ptr->size();
130
28
        for (size_t i = 0; i < col_size; i++) {
131
19
            state->args_set.insert({column_ptr, i});
132
19
        }
133
134
9
        return Status::OK();
135
51
    }
_ZN5doris20FunctionCollectionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
96
9
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
97
9
        if (scope == FunctionContext::THREAD_LOCAL) {
98
6
            return Status::OK();
99
6
        }
100
3
        int num_args = context->get_num_args();
101
3
        DCHECK(num_args >= 1);
102
103
3
        std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>();
104
3
        context->set_function_state(scope, state);
105
106
3
        DataTypePtr args_type = remove_nullable(context->get_arg_type(0));
107
3
        MutableColumnPtr args_column_ptr = args_type->create_column();
108
109
10
        for (int i = 1; i < num_args; i++) {
110
            // FE should make element type consistent and
111
            // equalize the length of the elements in struct
112
7
            const auto& const_column_ptr = context->get_constant_col(i);
113
            // Types like struct, array, and map only support constant expressions.
114
7
            DCHECK(const_column_ptr != nullptr);
115
7
            const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
116
7
            if (col->is_nullable()) {
117
1
                const auto* null_col = check_and_get_column<ColumnNullable>(col.get());
118
1
                if (null_col->has_null()) {
119
1
                    state->null_in_set = true;
120
1
                } else {
121
0
                    args_column_ptr->insert_from(null_col->get_nested_column(), 0);
122
0
                }
123
6
            } else {
124
6
                args_column_ptr->insert_from(*col, 0);
125
6
            }
126
7
        }
127
3
        ColumnPtr column_ptr = std::move(args_column_ptr);
128
        // make collection ref into set
129
3
        auto col_size = column_ptr->size();
130
9
        for (size_t i = 0; i < col_size; i++) {
131
6
            state->args_set.insert({column_ptr, i});
132
6
        }
133
134
3
        return Status::OK();
135
9
    }
136
137
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
138
18
                        uint32_t result, size_t input_rows_count) const override {
139
18
        auto in_state = reinterpret_cast<CollectionInState*>(
140
18
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
141
18
        if (!in_state) {
142
0
            return Status::RuntimeError("function context for function '{}' must have Set;",
143
0
                                        get_name());
144
0
        }
145
18
        const auto& args_set = in_state->args_set;
146
18
        const bool null_in_set = in_state->null_in_set;
147
18
        auto res = ColumnUInt8::create();
148
18
        ColumnUInt8::Container& vec_res = res->get_data();
149
18
        vec_res.resize(input_rows_count);
150
151
18
        ColumnUInt8::MutablePtr col_null_map_to;
152
18
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
153
18
        auto& vec_null_map_to = col_null_map_to->get_data();
154
155
18
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
156
18
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
157
18
        auto materialized_column_not_null = materialized_column;
158
18
        if (materialized_column_not_null->is_nullable()) {
159
12
            materialized_column_not_null = assert_cast<ColumnPtr>(
160
12
                    check_and_get_column<ColumnNullable>(materialized_column_not_null.get())
161
12
                            ->get_nested_column_ptr());
162
12
        }
163
164
36
        for (size_t i = 0; i < input_rows_count; ++i) {
165
18
            bool find = args_set.find({materialized_column_not_null, i}) != args_set.end();
166
167
18
            if constexpr (negative) {
168
3
                vec_res[i] = !find;
169
15
            } else {
170
15
                vec_res[i] = find;
171
15
            }
172
173
18
            if (null_in_set) {
174
8
                vec_null_map_to[i] = negative == vec_res[i];
175
10
            } else {
176
10
                vec_null_map_to[i] = false;
177
10
            }
178
18
        }
179
180
18
        if (block.get_by_position(result).type->is_nullable()) {
181
14
            block.replace_by_position(
182
14
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
183
14
        } else {
184
4
            block.replace_by_position(result, std::move(res));
185
4
        }
186
18
        return Status::OK();
187
18
    }
_ZNK5doris20FunctionCollectionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
138
15
                        uint32_t result, size_t input_rows_count) const override {
139
15
        auto in_state = reinterpret_cast<CollectionInState*>(
140
15
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
141
15
        if (!in_state) {
142
0
            return Status::RuntimeError("function context for function '{}' must have Set;",
143
0
                                        get_name());
144
0
        }
145
15
        const auto& args_set = in_state->args_set;
146
15
        const bool null_in_set = in_state->null_in_set;
147
15
        auto res = ColumnUInt8::create();
148
15
        ColumnUInt8::Container& vec_res = res->get_data();
149
15
        vec_res.resize(input_rows_count);
150
151
15
        ColumnUInt8::MutablePtr col_null_map_to;
152
15
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
153
15
        auto& vec_null_map_to = col_null_map_to->get_data();
154
155
15
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
156
15
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
157
15
        auto materialized_column_not_null = materialized_column;
158
15
        if (materialized_column_not_null->is_nullable()) {
159
12
            materialized_column_not_null = assert_cast<ColumnPtr>(
160
12
                    check_and_get_column<ColumnNullable>(materialized_column_not_null.get())
161
12
                            ->get_nested_column_ptr());
162
12
        }
163
164
30
        for (size_t i = 0; i < input_rows_count; ++i) {
165
15
            bool find = args_set.find({materialized_column_not_null, i}) != args_set.end();
166
167
            if constexpr (negative) {
168
                vec_res[i] = !find;
169
15
            } else {
170
15
                vec_res[i] = find;
171
15
            }
172
173
15
            if (null_in_set) {
174
7
                vec_null_map_to[i] = negative == vec_res[i];
175
8
            } else {
176
8
                vec_null_map_to[i] = false;
177
8
            }
178
15
        }
179
180
15
        if (block.get_by_position(result).type->is_nullable()) {
181
13
            block.replace_by_position(
182
13
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
183
13
        } else {
184
2
            block.replace_by_position(result, std::move(res));
185
2
        }
186
15
        return Status::OK();
187
15
    }
_ZNK5doris20FunctionCollectionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
138
3
                        uint32_t result, size_t input_rows_count) const override {
139
3
        auto in_state = reinterpret_cast<CollectionInState*>(
140
3
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
141
3
        if (!in_state) {
142
0
            return Status::RuntimeError("function context for function '{}' must have Set;",
143
0
                                        get_name());
144
0
        }
145
3
        const auto& args_set = in_state->args_set;
146
3
        const bool null_in_set = in_state->null_in_set;
147
3
        auto res = ColumnUInt8::create();
148
3
        ColumnUInt8::Container& vec_res = res->get_data();
149
3
        vec_res.resize(input_rows_count);
150
151
3
        ColumnUInt8::MutablePtr col_null_map_to;
152
3
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
153
3
        auto& vec_null_map_to = col_null_map_to->get_data();
154
155
3
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
156
3
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
157
3
        auto materialized_column_not_null = materialized_column;
158
3
        if (materialized_column_not_null->is_nullable()) {
159
0
            materialized_column_not_null = assert_cast<ColumnPtr>(
160
0
                    check_and_get_column<ColumnNullable>(materialized_column_not_null.get())
161
0
                            ->get_nested_column_ptr());
162
0
        }
163
164
6
        for (size_t i = 0; i < input_rows_count; ++i) {
165
3
            bool find = args_set.find({materialized_column_not_null, i}) != args_set.end();
166
167
3
            if constexpr (negative) {
168
3
                vec_res[i] = !find;
169
            } else {
170
                vec_res[i] = find;
171
            }
172
173
3
            if (null_in_set) {
174
1
                vec_null_map_to[i] = negative == vec_res[i];
175
2
            } else {
176
2
                vec_null_map_to[i] = false;
177
2
            }
178
3
        }
179
180
3
        if (block.get_by_position(result).type->is_nullable()) {
181
1
            block.replace_by_position(
182
1
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
183
2
        } else {
184
2
            block.replace_by_position(result, std::move(res));
185
2
        }
186
3
        return Status::OK();
187
3
    }
188
};
189
190
} // namespace doris