Coverage Report

Created: 2026-05-12 13:04

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/in.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
19
#pragma once
20
21
#include <glog/logging.h>
22
23
#include <boost/iterator/iterator_facade.hpp>
24
#include <cstddef>
25
#include <memory>
26
#include <utility>
27
#include <vector>
28
29
#include "common/status.h"
30
#include "core/block/block.h"
31
#include "core/block/column_numbers.h"
32
#include "core/block/column_with_type_and_name.h"
33
#include "core/column/column.h"
34
#include "core/column/column_const.h"
35
#include "core/column/column_nullable.h"
36
#include "core/column/column_vector.h"
37
#include "core/data_type/data_type.h"
38
#include "core/data_type/data_type_nullable.h"
39
#include "core/data_type/data_type_number.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/field.h"
42
#include "core/string_ref.h"
43
#include "core/types.h"
44
#include "exprs/aggregate/aggregate_function.h"
45
#include "exprs/create_predicate_function.h"
46
#include "exprs/function/function.h"
47
#include "exprs/function_context.h"
48
#include "exprs/hybrid_set.h"
49
#include "storage/index/index_reader_helper.h"
50
51
namespace doris {
52
53
template <typename T>
54
class ColumnStr;
55
using ColumnString = ColumnStr<UInt32>;
56
57
struct InState {
58
    bool use_set = true;
59
    std::shared_ptr<HybridSetBase> hybrid_set;
60
};
61
62
template <bool negative>
63
class FunctionIn : public IFunction {
64
public:
65
    static constexpr auto name = negative ? "not_in" : "in";
66
67
1.21k
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
_ZN5doris10FunctionInILb0EE6createEv
Line
Count
Source
67
1.03k
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
_ZN5doris10FunctionInILb1EE6createEv
Line
Count
Source
67
176
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
68
69
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE8get_nameB5cxx11Ev
70
71
1.19k
    bool is_variadic() const override { return true; }
_ZNK5doris10FunctionInILb0EE11is_variadicEv
Line
Count
Source
71
1.02k
    bool is_variadic() const override { return true; }
_ZNK5doris10FunctionInILb1EE11is_variadicEv
Line
Count
Source
71
168
    bool is_variadic() const override { return true; }
72
73
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE23get_number_of_argumentsEv
74
75
1.19k
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
2.49k
        for (const auto& arg : args) {
77
2.49k
            if (arg->is_nullable()) {
78
686
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
686
            }
80
2.49k
        }
81
509
        return std::make_shared<DataTypeUInt8>();
82
1.19k
    }
_ZNK5doris10FunctionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
75
1.02k
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
2.22k
        for (const auto& arg : args) {
77
2.22k
            if (arg->is_nullable()) {
78
557
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
557
            }
80
2.22k
        }
81
471
        return std::make_shared<DataTypeUInt8>();
82
1.02k
    }
_ZNK5doris10FunctionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
75
167
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
274
        for (const auto& arg : args) {
77
274
            if (arg->is_nullable()) {
78
129
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
129
            }
80
274
        }
81
38
        return std::make_shared<DataTypeUInt8>();
82
167
    }
83
84
3.62k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris10FunctionInILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
84
2.66k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris10FunctionInILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
84
963
    bool use_default_implementation_for_nulls() const override { return false; }
85
86
    // size of [ in ( 1 , 2  , 3 , null) ]  is 3
87
1.19k
    size_t get_size_with_out_null(FunctionContext* context) {
88
1.19k
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
12
            return context->get_num_args() - 1;
90
12
        }
91
1.17k
        size_t sz = 0;
92
3.97k
        for (int i = 1; i < context->get_num_args(); ++i) {
93
2.79k
            const auto& const_column_ptr = context->get_constant_col(i);
94
2.79k
            if (const_column_ptr != nullptr) {
95
2.79k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
2.79k
                if (const_data.data != nullptr) {
97
2.75k
                    sz++;
98
2.75k
                }
99
2.79k
            }
100
2.79k
        }
101
1.17k
        return sz;
102
1.19k
    }
_ZN5doris10FunctionInILb0EE22get_size_with_out_nullEPNS_15FunctionContextE
Line
Count
Source
87
1.02k
    size_t get_size_with_out_null(FunctionContext* context) {
88
1.02k
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
12
            return context->get_num_args() - 1;
90
12
        }
91
1.01k
        size_t sz = 0;
92
3.39k
        for (int i = 1; i < context->get_num_args(); ++i) {
93
2.38k
            const auto& const_column_ptr = context->get_constant_col(i);
94
2.38k
            if (const_column_ptr != nullptr) {
95
2.38k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
2.38k
                if (const_data.data != nullptr) {
97
2.36k
                    sz++;
98
2.36k
                }
99
2.38k
            }
100
2.38k
        }
101
1.01k
        return sz;
102
1.02k
    }
_ZN5doris10FunctionInILb1EE22get_size_with_out_nullEPNS_15FunctionContextE
Line
Count
Source
87
165
    size_t get_size_with_out_null(FunctionContext* context) {
88
165
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
0
            return context->get_num_args() - 1;
90
0
        }
91
165
        size_t sz = 0;
92
581
        for (int i = 1; i < context->get_num_args(); ++i) {
93
416
            const auto& const_column_ptr = context->get_constant_col(i);
94
416
            if (const_column_ptr != nullptr) {
95
416
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
416
                if (const_data.data != nullptr) {
97
396
                    sz++;
98
396
                }
99
416
            }
100
416
        }
101
165
        return sz;
102
165
    }
103
104
9.23k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
9.23k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
8.04k
            return Status::OK();
107
8.04k
        }
108
1.18k
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
1.18k
        context->set_function_state(scope, state);
110
1.18k
        DCHECK(context->get_num_args() >= 1);
111
1.18k
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
1.18k
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
1.18k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
1.18k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
303
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
885
        } else {
119
885
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
885
                                               get_size_with_out_null(context), true));
121
885
        }
122
123
4.14k
        for (int i = 1; i < context->get_num_args(); ++i) {
124
2.95k
            const auto& const_column_ptr = context->get_constant_col(i);
125
2.95k
            if (const_column_ptr != nullptr) {
126
2.95k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
2.95k
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
2.95k
            } else {
129
1
                state->use_set = false;
130
1
                state->hybrid_set.reset();
131
1
                break;
132
1
            }
133
2.95k
        }
134
1.18k
        return Status::OK();
135
9.23k
    }
_ZN5doris10FunctionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
104
6.83k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
6.83k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
5.81k
            return Status::OK();
107
5.81k
        }
108
1.02k
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
1.02k
        context->set_function_state(scope, state);
110
1.02k
        DCHECK(context->get_num_args() >= 1);
111
1.02k
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
1.02k
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
1.02k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
1.02k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
220
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
803
        } else {
119
803
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
803
                                               get_size_with_out_null(context), true));
121
803
        }
122
123
3.56k
        for (int i = 1; i < context->get_num_args(); ++i) {
124
2.54k
            const auto& const_column_ptr = context->get_constant_col(i);
125
2.54k
            if (const_column_ptr != nullptr) {
126
2.53k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
2.53k
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
2.53k
            } else {
129
1
                state->use_set = false;
130
1
                state->hybrid_set.reset();
131
1
                break;
132
1
            }
133
2.54k
        }
134
1.02k
        return Status::OK();
135
6.83k
    }
_ZN5doris10FunctionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
104
2.39k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
2.39k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
2.22k
            return Status::OK();
107
2.22k
        }
108
165
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
165
        context->set_function_state(scope, state);
110
165
        DCHECK(context->get_num_args() >= 1);
111
165
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
165
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
165
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
165
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
83
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
83
        } else {
119
82
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
82
                                               get_size_with_out_null(context), true));
121
82
        }
122
123
581
        for (int i = 1; i < context->get_num_args(); ++i) {
124
416
            const auto& const_column_ptr = context->get_constant_col(i);
125
416
            if (const_column_ptr != nullptr) {
126
416
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
416
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
416
            } else {
129
0
                state->use_set = false;
130
0
                state->hybrid_set.reset();
131
0
                break;
132
0
            }
133
416
        }
134
165
        return Status::OK();
135
2.39k
    }
136
137
    Status evaluate_inverted_index(
138
            const ColumnsWithTypeAndName& arguments,
139
            const std::vector<IndexFieldNameAndTypePair>& data_type_with_names,
140
            std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows,
141
            const InvertedIndexAnalyzerCtx* analyzer_ctx,
142
716
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
716
        DCHECK(data_type_with_names.size() == 1);
144
716
        DCHECK(iterators.size() == 1);
145
716
        auto* iter = iterators[0];
146
716
        auto data_type_with_name = data_type_with_names[0];
147
716
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
716
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
716
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
716
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
140
            return Status::OK();
156
140
        }
157
576
        if (iter->has_null()) {
158
576
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
576
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
576
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
576
        }
162
718
        for (const auto& arg : arguments) {
163
718
            Field param_value;
164
718
            arg.column->get(0, param_value);
165
718
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
508
                if (negative) {
168
505
                    return Status::OK();
169
505
                }
170
3
                *roaring |= *null_bitmap;
171
3
                continue;
172
508
            }
173
210
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
210
            segment_v2::InvertedIndexParam param;
175
210
            param.column_name = data_type_with_name.first;
176
210
            param.column_type = data_type_with_name.second;
177
210
            param.query_value = param_value;
178
210
            param.query_type = query_type;
179
210
            param.num_rows = num_rows;
180
210
            param.roaring = std::make_shared<roaring::Roaring>();
181
210
            param.analyzer_ctx = analyzer_ctx;
182
210
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
203
            *roaring |= *param.roaring;
184
203
        }
185
64
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
64
        bitmap_result = result;
187
64
        bitmap_result.mask_out_null();
188
64
        if constexpr (negative) {
189
31
            roaring::Roaring full_result;
190
31
            full_result.addRange(0, num_rows);
191
31
            bitmap_result.op_not(&full_result);
192
31
        }
193
64
        return Status::OK();
194
576
    }
_ZNK5doris10FunctionInILb0EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE
Line
Count
Source
142
97
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
97
        DCHECK(data_type_with_names.size() == 1);
144
97
        DCHECK(iterators.size() == 1);
145
97
        auto* iter = iterators[0];
146
97
        auto data_type_with_name = data_type_with_names[0];
147
97
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
97
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
97
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
97
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
64
            return Status::OK();
156
64
        }
157
33
        if (iter->has_null()) {
158
33
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
33
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
33
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
33
        }
162
66
        for (const auto& arg : arguments) {
163
66
            Field param_value;
164
66
            arg.column->get(0, param_value);
165
66
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
3
                if (negative) {
168
0
                    return Status::OK();
169
0
                }
170
3
                *roaring |= *null_bitmap;
171
3
                continue;
172
3
            }
173
63
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
63
            segment_v2::InvertedIndexParam param;
175
63
            param.column_name = data_type_with_name.first;
176
63
            param.column_type = data_type_with_name.second;
177
63
            param.query_value = param_value;
178
63
            param.query_type = query_type;
179
63
            param.num_rows = num_rows;
180
63
            param.roaring = std::make_shared<roaring::Roaring>();
181
63
            param.analyzer_ctx = analyzer_ctx;
182
63
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
63
            *roaring |= *param.roaring;
184
63
        }
185
33
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
33
        bitmap_result = result;
187
33
        bitmap_result.mask_out_null();
188
        if constexpr (negative) {
189
            roaring::Roaring full_result;
190
            full_result.addRange(0, num_rows);
191
            bitmap_result.op_not(&full_result);
192
        }
193
33
        return Status::OK();
194
33
    }
_ZNK5doris10FunctionInILb1EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE
Line
Count
Source
142
619
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
619
        DCHECK(data_type_with_names.size() == 1);
144
619
        DCHECK(iterators.size() == 1);
145
619
        auto* iter = iterators[0];
146
619
        auto data_type_with_name = data_type_with_names[0];
147
619
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
619
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
619
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
619
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
76
            return Status::OK();
156
76
        }
157
543
        if (iter->has_null()) {
158
543
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
543
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
543
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
543
        }
162
652
        for (const auto& arg : arguments) {
163
652
            Field param_value;
164
652
            arg.column->get(0, param_value);
165
652
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
505
                if (negative) {
168
505
                    return Status::OK();
169
505
                }
170
0
                *roaring |= *null_bitmap;
171
0
                continue;
172
505
            }
173
147
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
147
            segment_v2::InvertedIndexParam param;
175
147
            param.column_name = data_type_with_name.first;
176
147
            param.column_type = data_type_with_name.second;
177
147
            param.query_value = param_value;
178
147
            param.query_type = query_type;
179
147
            param.num_rows = num_rows;
180
147
            param.roaring = std::make_shared<roaring::Roaring>();
181
147
            param.analyzer_ctx = analyzer_ctx;
182
147
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
140
            *roaring |= *param.roaring;
184
140
        }
185
31
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
31
        bitmap_result = result;
187
31
        bitmap_result.mask_out_null();
188
31
        if constexpr (negative) {
189
31
            roaring::Roaring full_result;
190
31
            full_result.addRange(0, num_rows);
191
31
            bitmap_result.op_not(&full_result);
192
31
        }
193
31
        return Status::OK();
194
543
    }
195
196
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
197
2.43k
                        uint32_t result, size_t input_rows_count) const override {
198
2.43k
        auto* in_state = reinterpret_cast<InState*>(
199
2.43k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
2.43k
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
2.43k
        auto res = ColumnUInt8::create();
205
2.43k
        ColumnUInt8::Container& vec_res = res->get_data();
206
2.43k
        vec_res.resize(input_rows_count);
207
208
2.43k
        ColumnUInt8::MutablePtr col_null_map_to;
209
2.43k
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
2.43k
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
2.43k
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
2.43k
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
2.43k
        if (in_state->use_set) {
216
2.43k
            if (materialized_column->is_nullable()) {
217
1.78k
                const auto* null_col_ptr =
218
1.78k
                        check_and_get_column<ColumnNullable>(materialized_column.get());
219
1.78k
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
1.78k
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
1.78k
                if (nested_col_ptr->is_column_string()) {
223
1.19k
                    const auto* column_string_ptr =
224
1.19k
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
1.19k
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
1.19k
                                               column_string_ptr);
227
1.19k
                } else {
228
                    //TODO: support other column type
229
591
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
591
                                               nested_col_ptr);
231
591
                }
232
233
1.78k
                if (!in_state->hybrid_set->contain_null()) {
234
215k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
213k
                        vec_null_map_to[i] = null_map[i];
236
213k
                    }
237
1.27k
                } else {
238
11.8k
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
11.3k
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
11.3k
                    }
241
517
                }
242
243
1.78k
            } else { // non-nullable
244
643
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
378
                    const auto* column_string_ptr =
246
378
                            assert_cast<const ColumnString*>(materialized_column.get());
247
378
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
378
                } else {
249
265
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
265
                }
251
252
643
                if (in_state->hybrid_set->contain_null()) {
253
81
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
51
                        vec_null_map_to[i] = negative == vec_res[i];
255
51
                    }
256
30
                }
257
643
            }
258
2.43k
        } else { //!in_state->use_set
259
0
            std::vector<ColumnPtr> set_columns;
260
2
            for (int i = 1; i < arguments.size(); ++i) {
261
2
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
2
            }
263
0
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
0
            } else {
267
0
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
0
                                        vec_null_map_to, materialized_column);
269
0
            }
270
0
        }
271
272
2.43k
        if (block.get_by_position(result).type->is_nullable()) {
273
1.81k
            block.replace_by_position(
274
1.81k
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
1.81k
        } else {
276
612
            block.replace_by_position(result, std::move(res));
277
612
        }
278
279
2.43k
        return Status::OK();
280
2.43k
    }
_ZNK5doris10FunctionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
197
1.63k
                        uint32_t result, size_t input_rows_count) const override {
198
1.63k
        auto* in_state = reinterpret_cast<InState*>(
199
1.63k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
1.63k
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
1.63k
        auto res = ColumnUInt8::create();
205
1.63k
        ColumnUInt8::Container& vec_res = res->get_data();
206
1.63k
        vec_res.resize(input_rows_count);
207
208
1.63k
        ColumnUInt8::MutablePtr col_null_map_to;
209
1.63k
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
1.63k
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
1.63k
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
1.63k
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
1.63k
        if (in_state->use_set) {
216
1.63k
            if (materialized_column->is_nullable()) {
217
1.02k
                const auto* null_col_ptr =
218
1.02k
                        check_and_get_column<ColumnNullable>(materialized_column.get());
219
1.02k
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
1.02k
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
1.02k
                if (nested_col_ptr->is_column_string()) {
223
537
                    const auto* column_string_ptr =
224
537
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
537
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
537
                                               column_string_ptr);
227
537
                } else {
228
                    //TODO: support other column type
229
488
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
488
                                               nested_col_ptr);
231
488
                }
232
233
1.02k
                if (!in_state->hybrid_set->contain_null()) {
234
210k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
209k
                        vec_null_map_to[i] = null_map[i];
236
209k
                    }
237
1.00k
                } else {
238
85
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
60
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
60
                    }
241
25
                }
242
243
1.02k
            } else { // non-nullable
244
610
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
378
                    const auto* column_string_ptr =
246
378
                            assert_cast<const ColumnString*>(materialized_column.get());
247
378
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
378
                } else {
249
232
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
232
                }
251
252
610
                if (in_state->hybrid_set->contain_null()) {
253
2
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
1
                        vec_null_map_to[i] = negative == vec_res[i];
255
1
                    }
256
1
                }
257
610
            }
258
1.63k
        } else { //!in_state->use_set
259
1
            std::vector<ColumnPtr> set_columns;
260
3
            for (int i = 1; i < arguments.size(); ++i) {
261
2
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
2
            }
263
1
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
1
            } else {
267
1
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
1
                                        vec_null_map_to, materialized_column);
269
1
            }
270
1
        }
271
272
1.63k
        if (block.get_by_position(result).type->is_nullable()) {
273
1.02k
            block.replace_by_position(
274
1.02k
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
1.02k
        } else {
276
609
            block.replace_by_position(result, std::move(res));
277
609
        }
278
279
1.63k
        return Status::OK();
280
1.63k
    }
_ZNK5doris10FunctionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
197
795
                        uint32_t result, size_t input_rows_count) const override {
198
795
        auto* in_state = reinterpret_cast<InState*>(
199
795
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
795
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
795
        auto res = ColumnUInt8::create();
205
795
        ColumnUInt8::Container& vec_res = res->get_data();
206
795
        vec_res.resize(input_rows_count);
207
208
795
        ColumnUInt8::MutablePtr col_null_map_to;
209
795
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
795
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
795
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
795
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
796
        if (in_state->use_set) {
216
796
            if (materialized_column->is_nullable()) {
217
763
                const auto* null_col_ptr =
218
763
                        check_and_get_column<ColumnNullable>(materialized_column.get());
219
763
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
763
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
763
                if (nested_col_ptr->is_column_string()) {
223
660
                    const auto* column_string_ptr =
224
660
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
660
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
660
                                               column_string_ptr);
227
660
                } else {
228
                    //TODO: support other column type
229
103
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
103
                                               nested_col_ptr);
231
103
                }
232
233
763
                if (!in_state->hybrid_set->contain_null()) {
234
4.92k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
4.65k
                        vec_null_map_to[i] = null_map[i];
236
4.65k
                    }
237
492
                } else {
238
11.7k
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
11.2k
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
11.2k
                    }
241
492
                }
242
243
763
            } else { // non-nullable
244
33
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
0
                    const auto* column_string_ptr =
246
0
                            assert_cast<const ColumnString*>(materialized_column.get());
247
0
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
33
                } else {
249
33
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
33
                }
251
252
33
                if (in_state->hybrid_set->contain_null()) {
253
79
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
50
                        vec_null_map_to[i] = negative == vec_res[i];
255
50
                    }
256
29
                }
257
33
            }
258
18.4E
        } else { //!in_state->use_set
259
18.4E
            std::vector<ColumnPtr> set_columns;
260
18.4E
            for (int i = 1; i < arguments.size(); ++i) {
261
0
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
0
            }
263
18.4E
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
18.4E
            } else {
267
18.4E
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
18.4E
                                        vec_null_map_to, materialized_column);
269
18.4E
            }
270
18.4E
        }
271
272
795
        if (block.get_by_position(result).type->is_nullable()) {
273
792
            block.replace_by_position(
274
792
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
792
        } else {
276
3
            block.replace_by_position(result, std::move(res));
277
3
        }
278
279
795
        return Status::OK();
280
795
    }
281
282
private:
283
    template <typename T>
284
    static void search_hash_set_check_null(InState* in_state, size_t input_rows_count,
285
                                           ColumnUInt8::Container& vec_res,
286
1.78k
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
1.78k
        if constexpr (!negative) {
288
1.02k
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
1.02k
                                                      vec_res);
290
1.02k
        } else {
291
763
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
763
                                                               vec_res);
293
763
        }
294
1.78k
    }
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_
Line
Count
Source
286
537
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
537
        if constexpr (!negative) {
288
537
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
537
                                                      vec_res);
290
        } else {
291
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
                                                               vec_res);
293
        }
294
537
    }
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_
Line
Count
Source
286
488
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
488
        if constexpr (!negative) {
288
488
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
488
                                                      vec_res);
290
        } else {
291
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
                                                               vec_res);
293
        }
294
488
    }
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_
Line
Count
Source
286
660
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
        if constexpr (!negative) {
288
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
                                                      vec_res);
290
660
        } else {
291
660
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
660
                                                               vec_res);
293
660
        }
294
660
    }
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_
Line
Count
Source
286
103
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
        if constexpr (!negative) {
288
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
                                                      vec_res);
290
103
        } else {
291
103
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
103
                                                               vec_res);
293
103
        }
294
103
    }
295
296
    template <typename T>
297
    static void search_hash_set(InState* in_state, size_t input_rows_count,
298
643
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
643
        if constexpr (!negative) {
300
610
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
610
        } else {
302
33
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
33
        }
304
643
    }
_ZN5doris10FunctionInILb0EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
378
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
378
        if constexpr (!negative) {
300
378
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
        } else {
302
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
        }
304
378
    }
_ZN5doris10FunctionInILb0EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
232
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
232
        if constexpr (!negative) {
300
232
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
        } else {
302
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
        }
304
232
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
_ZN5doris10FunctionInILb1EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
33
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
        if constexpr (!negative) {
300
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
33
        } else {
302
33
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
33
        }
304
33
    }
305
306
    template <bool Const>
307
    static void impl_without_set(FunctionContext* context,
308
                                 const std::vector<ColumnPtr>& set_columns, size_t input_rows_count,
309
                                 ColumnUInt8::Container& vec_res,
310
                                 ColumnUInt8::Container& vec_null_map_to,
311
1
                                 const ColumnPtr& materialized_column) {
312
2
        for (size_t i = 0; i < input_rows_count; ++i) {
313
1
            const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const));
314
1
            if (ref_data.data == nullptr) {
315
0
                vec_null_map_to[i] = true;
316
0
                continue;
317
0
            }
318
319
1
            std::vector<StringRef> set_datas;
320
            // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL,
321
            // but also if no match is found in the list and one of the expressions in the list is NULL.
322
1
            bool null_in_set = false;
323
324
2
            for (const auto& set_column : set_columns) {
325
2
                auto set_data = set_column->get_data_at(i);
326
2
                if (set_data.data == nullptr) {
327
0
                    null_in_set = true;
328
2
                } else {
329
2
                    set_datas.push_back(set_data);
330
2
                }
331
2
            }
332
1
            std::unique_ptr<HybridSetBase> hybrid_set(create_set(
333
1
                    context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true));
334
2
            for (auto& set_data : set_datas) {
335
2
                hybrid_set->insert((void*)(set_data.data), set_data.size);
336
2
            }
337
338
1
            vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
339
1
            if (null_in_set) {
340
0
                vec_null_map_to[i] = negative == vec_res[i];
341
1
            } else {
342
1
                vec_null_map_to[i] = false;
343
1
            }
344
1
        }
345
1
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb0EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
_ZN5doris10FunctionInILb0EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
Line
Count
Source
311
1
                                 const ColumnPtr& materialized_column) {
312
2
        for (size_t i = 0; i < input_rows_count; ++i) {
313
1
            const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const));
314
1
            if (ref_data.data == nullptr) {
315
0
                vec_null_map_to[i] = true;
316
0
                continue;
317
0
            }
318
319
1
            std::vector<StringRef> set_datas;
320
            // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL,
321
            // but also if no match is found in the list and one of the expressions in the list is NULL.
322
1
            bool null_in_set = false;
323
324
2
            for (const auto& set_column : set_columns) {
325
2
                auto set_data = set_column->get_data_at(i);
326
2
                if (set_data.data == nullptr) {
327
0
                    null_in_set = true;
328
2
                } else {
329
2
                    set_datas.push_back(set_data);
330
2
                }
331
2
            }
332
1
            std::unique_ptr<HybridSetBase> hybrid_set(create_set(
333
1
                    context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true));
334
2
            for (auto& set_data : set_datas) {
335
2
                hybrid_set->insert((void*)(set_data.data), set_data.size);
336
2
            }
337
338
1
            vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
339
1
            if (null_in_set) {
340
0
                vec_null_map_to[i] = negative == vec_res[i];
341
1
            } else {
342
1
                vec_null_map_to[i] = false;
343
1
            }
344
1
        }
345
1
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
346
};
347
348
} // namespace doris