Coverage Report

Created: 2026-05-16 07:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/in.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
19
#pragma once
20
21
#include <glog/logging.h>
22
23
#include <boost/iterator/iterator_facade.hpp>
24
#include <cstddef>
25
#include <memory>
26
#include <utility>
27
#include <vector>
28
29
#include "common/status.h"
30
#include "core/block/block.h"
31
#include "core/block/column_numbers.h"
32
#include "core/block/column_with_type_and_name.h"
33
#include "core/column/column.h"
34
#include "core/column/column_const.h"
35
#include "core/column/column_nullable.h"
36
#include "core/column/column_vector.h"
37
#include "core/data_type/data_type.h"
38
#include "core/data_type/data_type_nullable.h"
39
#include "core/data_type/data_type_number.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/field.h"
42
#include "core/string_ref.h"
43
#include "core/types.h"
44
#include "exprs/aggregate/aggregate_function.h"
45
#include "exprs/create_predicate_function.h"
46
#include "exprs/function/function.h"
47
#include "exprs/function_context.h"
48
#include "exprs/hybrid_set.h"
49
#include "storage/index/index_reader_helper.h"
50
51
namespace doris {
52
53
template <typename T>
54
class ColumnStr;
55
using ColumnString = ColumnStr<UInt32>;
56
57
struct InState {
58
    bool use_set = true;
59
    std::shared_ptr<HybridSetBase> hybrid_set;
60
};
61
62
template <bool negative>
63
class FunctionIn : public IFunction {
64
public:
65
    static constexpr auto name = negative ? "not_in" : "in";
66
67
1.26k
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
_ZN5doris10FunctionInILb0EE6createEv
Line
Count
Source
67
1.07k
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
_ZN5doris10FunctionInILb1EE6createEv
Line
Count
Source
67
184
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
68
69
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE8get_nameB5cxx11Ev
70
71
1.24k
    bool is_variadic() const override { return true; }
_ZNK5doris10FunctionInILb0EE11is_variadicEv
Line
Count
Source
71
1.06k
    bool is_variadic() const override { return true; }
_ZNK5doris10FunctionInILb1EE11is_variadicEv
Line
Count
Source
71
176
    bool is_variadic() const override { return true; }
72
73
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE23get_number_of_argumentsEv
74
75
1.24k
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
2.57k
        for (const auto& arg : args) {
77
2.57k
            if (arg->is_nullable()) {
78
727
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
727
            }
80
2.57k
        }
81
516
        return std::make_shared<DataTypeUInt8>();
82
1.24k
    }
_ZNK5doris10FunctionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
75
1.06k
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
2.28k
        for (const auto& arg : args) {
77
2.28k
            if (arg->is_nullable()) {
78
590
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
590
            }
80
2.28k
        }
81
478
        return std::make_shared<DataTypeUInt8>();
82
1.06k
    }
_ZNK5doris10FunctionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
75
175
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
282
        for (const auto& arg : args) {
77
282
            if (arg->is_nullable()) {
78
137
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
137
            }
80
282
        }
81
38
        return std::make_shared<DataTypeUInt8>();
82
175
    }
83
84
3.56k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris10FunctionInILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
84
2.60k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris10FunctionInILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
84
959
    bool use_default_implementation_for_nulls() const override { return false; }
85
86
    // size of [ in ( 1 , 2  , 3 , null) ]  is 3
87
1.23k
    size_t get_size_with_out_null(FunctionContext* context) {
88
1.23k
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
12
            return context->get_num_args() - 1;
90
12
        }
91
1.22k
        size_t sz = 0;
92
4.13k
        for (int i = 1; i < context->get_num_args(); ++i) {
93
2.90k
            const auto& const_column_ptr = context->get_constant_col(i);
94
2.90k
            if (const_column_ptr != nullptr) {
95
2.90k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
2.90k
                if (const_data.data != nullptr) {
97
2.86k
                    sz++;
98
2.86k
                }
99
2.90k
            }
100
2.90k
        }
101
1.22k
        return sz;
102
1.23k
    }
_ZN5doris10FunctionInILb0EE22get_size_with_out_nullEPNS_15FunctionContextE
Line
Count
Source
87
1.06k
    size_t get_size_with_out_null(FunctionContext* context) {
88
1.06k
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
12
            return context->get_num_args() - 1;
90
12
        }
91
1.05k
        size_t sz = 0;
92
3.52k
        for (int i = 1; i < context->get_num_args(); ++i) {
93
2.47k
            const auto& const_column_ptr = context->get_constant_col(i);
94
2.47k
            if (const_column_ptr != nullptr) {
95
2.47k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
2.47k
                if (const_data.data != nullptr) {
97
2.45k
                    sz++;
98
2.45k
                }
99
2.47k
            }
100
2.47k
        }
101
1.05k
        return sz;
102
1.06k
    }
_ZN5doris10FunctionInILb1EE22get_size_with_out_nullEPNS_15FunctionContextE
Line
Count
Source
87
173
    size_t get_size_with_out_null(FunctionContext* context) {
88
173
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
0
            return context->get_num_args() - 1;
90
0
        }
91
173
        size_t sz = 0;
92
605
        for (int i = 1; i < context->get_num_args(); ++i) {
93
432
            const auto& const_column_ptr = context->get_constant_col(i);
94
432
            if (const_column_ptr != nullptr) {
95
432
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
432
                if (const_data.data != nullptr) {
97
412
                    sz++;
98
412
                }
99
432
            }
100
432
        }
101
173
        return sz;
102
173
    }
103
104
9.15k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
9.15k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
7.92k
            return Status::OK();
107
7.92k
        }
108
1.23k
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
1.23k
        context->set_function_state(scope, state);
110
1.23k
        DCHECK(context->get_num_args() >= 1);
111
1.23k
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
1.23k
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
1.23k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
1.23k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
315
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
919
        } else {
119
919
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
919
                                               get_size_with_out_null(context), true));
121
919
        }
122
123
4.29k
        for (int i = 1; i < context->get_num_args(); ++i) {
124
3.06k
            const auto& const_column_ptr = context->get_constant_col(i);
125
3.06k
            if (const_column_ptr != nullptr) {
126
3.06k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
3.06k
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
3.06k
            } else {
129
1
                state->use_set = false;
130
1
                state->hybrid_set.reset();
131
1
                break;
132
1
            }
133
3.06k
        }
134
1.23k
        return Status::OK();
135
9.15k
    }
_ZN5doris10FunctionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
104
7.15k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
7.15k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
6.08k
            return Status::OK();
107
6.08k
        }
108
1.06k
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
1.06k
        context->set_function_state(scope, state);
110
1.06k
        DCHECK(context->get_num_args() >= 1);
111
1.06k
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
1.06k
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
1.06k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
1.06k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
232
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
829
        } else {
119
829
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
829
                                               get_size_with_out_null(context), true));
121
829
        }
122
123
3.69k
        for (int i = 1; i < context->get_num_args(); ++i) {
124
2.63k
            const auto& const_column_ptr = context->get_constant_col(i);
125
2.63k
            if (const_column_ptr != nullptr) {
126
2.63k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
2.63k
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
2.63k
            } else {
129
1
                state->use_set = false;
130
1
                state->hybrid_set.reset();
131
1
                break;
132
1
            }
133
2.63k
        }
134
1.06k
        return Status::OK();
135
7.15k
    }
_ZN5doris10FunctionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
104
2.00k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
2.00k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
1.83k
            return Status::OK();
107
1.83k
        }
108
173
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
173
        context->set_function_state(scope, state);
110
173
        DCHECK(context->get_num_args() >= 1);
111
173
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
173
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
173
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
173
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
83
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
90
        } else {
119
90
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
90
                                               get_size_with_out_null(context), true));
121
90
        }
122
123
605
        for (int i = 1; i < context->get_num_args(); ++i) {
124
432
            const auto& const_column_ptr = context->get_constant_col(i);
125
432
            if (const_column_ptr != nullptr) {
126
432
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
432
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
432
            } else {
129
0
                state->use_set = false;
130
0
                state->hybrid_set.reset();
131
0
                break;
132
0
            }
133
432
        }
134
173
        return Status::OK();
135
2.00k
    }
136
137
    Status evaluate_inverted_index(
138
            const ColumnsWithTypeAndName& arguments,
139
            const std::vector<IndexFieldNameAndTypePair>& data_type_with_names,
140
            std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows,
141
            const InvertedIndexAnalyzerCtx* analyzer_ctx,
142
715
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
715
        DCHECK(data_type_with_names.size() == 1);
144
715
        DCHECK(iterators.size() == 1);
145
715
        auto* iter = iterators[0];
146
715
        auto data_type_with_name = data_type_with_names[0];
147
715
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
715
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
715
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
715
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
140
            return Status::OK();
156
140
        }
157
576
        if (iter->has_null()) {
158
576
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
576
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
576
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
576
        }
162
718
        for (const auto& arg : arguments) {
163
718
            Field param_value;
164
718
            arg.column->get(0, param_value);
165
718
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
508
                if (negative) {
168
505
                    return Status::OK();
169
505
                }
170
3
                *roaring |= *null_bitmap;
171
3
                continue;
172
508
            }
173
210
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
210
            segment_v2::InvertedIndexParam param;
175
210
            param.column_name = data_type_with_name.first;
176
210
            param.column_type = data_type_with_name.second;
177
210
            param.query_value = param_value;
178
210
            param.query_type = query_type;
179
210
            param.num_rows = num_rows;
180
210
            param.roaring = std::make_shared<roaring::Roaring>();
181
210
            param.analyzer_ctx = analyzer_ctx;
182
210
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
203
            *roaring |= *param.roaring;
184
203
        }
185
63
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
63
        bitmap_result = result;
187
63
        bitmap_result.mask_out_null();
188
63
        if constexpr (negative) {
189
30
            roaring::Roaring full_result;
190
30
            full_result.addRange(0, num_rows);
191
30
            bitmap_result.op_not(&full_result);
192
30
        }
193
63
        return Status::OK();
194
575
    }
_ZNK5doris10FunctionInILb0EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE
Line
Count
Source
142
97
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
97
        DCHECK(data_type_with_names.size() == 1);
144
97
        DCHECK(iterators.size() == 1);
145
97
        auto* iter = iterators[0];
146
97
        auto data_type_with_name = data_type_with_names[0];
147
97
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
97
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
97
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
97
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
64
            return Status::OK();
156
64
        }
157
33
        if (iter->has_null()) {
158
33
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
33
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
33
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
33
        }
162
66
        for (const auto& arg : arguments) {
163
66
            Field param_value;
164
66
            arg.column->get(0, param_value);
165
66
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
3
                if (negative) {
168
0
                    return Status::OK();
169
0
                }
170
3
                *roaring |= *null_bitmap;
171
3
                continue;
172
3
            }
173
63
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
63
            segment_v2::InvertedIndexParam param;
175
63
            param.column_name = data_type_with_name.first;
176
63
            param.column_type = data_type_with_name.second;
177
63
            param.query_value = param_value;
178
63
            param.query_type = query_type;
179
63
            param.num_rows = num_rows;
180
63
            param.roaring = std::make_shared<roaring::Roaring>();
181
63
            param.analyzer_ctx = analyzer_ctx;
182
63
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
63
            *roaring |= *param.roaring;
184
63
        }
185
33
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
33
        bitmap_result = result;
187
33
        bitmap_result.mask_out_null();
188
        if constexpr (negative) {
189
            roaring::Roaring full_result;
190
            full_result.addRange(0, num_rows);
191
            bitmap_result.op_not(&full_result);
192
        }
193
33
        return Status::OK();
194
33
    }
_ZNK5doris10FunctionInILb1EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE
Line
Count
Source
142
618
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
618
        DCHECK(data_type_with_names.size() == 1);
144
618
        DCHECK(iterators.size() == 1);
145
618
        auto* iter = iterators[0];
146
618
        auto data_type_with_name = data_type_with_names[0];
147
618
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
618
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
618
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
618
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
76
            return Status::OK();
156
76
        }
157
543
        if (iter->has_null()) {
158
543
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
543
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
543
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
543
        }
162
652
        for (const auto& arg : arguments) {
163
652
            Field param_value;
164
652
            arg.column->get(0, param_value);
165
652
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
505
                if (negative) {
168
505
                    return Status::OK();
169
505
                }
170
0
                *roaring |= *null_bitmap;
171
0
                continue;
172
505
            }
173
147
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
147
            segment_v2::InvertedIndexParam param;
175
147
            param.column_name = data_type_with_name.first;
176
147
            param.column_type = data_type_with_name.second;
177
147
            param.query_value = param_value;
178
147
            param.query_type = query_type;
179
147
            param.num_rows = num_rows;
180
147
            param.roaring = std::make_shared<roaring::Roaring>();
181
147
            param.analyzer_ctx = analyzer_ctx;
182
147
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
140
            *roaring |= *param.roaring;
184
140
        }
185
30
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
30
        bitmap_result = result;
187
30
        bitmap_result.mask_out_null();
188
30
        if constexpr (negative) {
189
30
            roaring::Roaring full_result;
190
30
            full_result.addRange(0, num_rows);
191
30
            bitmap_result.op_not(&full_result);
192
30
        }
193
30
        return Status::OK();
194
542
    }
195
196
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
197
2.32k
                        uint32_t result, size_t input_rows_count) const override {
198
2.32k
        auto* in_state = reinterpret_cast<InState*>(
199
2.32k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
2.32k
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
2.32k
        auto res = ColumnUInt8::create();
205
2.32k
        ColumnUInt8::Container& vec_res = res->get_data();
206
2.32k
        vec_res.resize(input_rows_count);
207
208
2.32k
        ColumnUInt8::MutablePtr col_null_map_to;
209
2.32k
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
2.32k
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
2.32k
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
2.32k
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
2.32k
        if (in_state->use_set) {
216
2.32k
            if (materialized_column->is_nullable()) {
217
1.74k
                const auto* null_col_ptr =
218
1.74k
                        assert_cast<const ColumnNullable*>(materialized_column.get());
219
1.74k
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
1.74k
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
1.74k
                if (nested_col_ptr->is_column_string()) {
223
1.15k
                    const auto* column_string_ptr =
224
1.15k
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
1.15k
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
1.15k
                                               column_string_ptr);
227
1.15k
                } else {
228
                    //TODO: support other column type
229
588
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
588
                                               nested_col_ptr);
231
588
                }
232
233
1.74k
                if (!in_state->hybrid_set->contain_null()) {
234
215k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
214k
                        vec_null_map_to[i] = null_map[i];
236
214k
                    }
237
1.22k
                } else {
238
11.8k
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
11.3k
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
11.3k
                    }
241
517
                }
242
243
1.74k
            } else { // non-nullable
244
580
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
297
                    const auto* column_string_ptr =
246
297
                            assert_cast<const ColumnString*>(materialized_column.get());
247
297
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
297
                } else {
249
283
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
283
                }
251
252
580
                if (in_state->hybrid_set->contain_null()) {
253
81
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
51
                        vec_null_map_to[i] = negative == vec_res[i];
255
51
                    }
256
30
                }
257
580
            }
258
18.4E
        } else { //!in_state->use_set
259
18.4E
            std::vector<ColumnPtr> set_columns;
260
18.4E
            for (int i = 1; i < arguments.size(); ++i) {
261
2
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
2
            }
263
18.4E
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
18.4E
            } else {
267
18.4E
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
18.4E
                                        vec_null_map_to, materialized_column);
269
18.4E
            }
270
18.4E
        }
271
272
2.32k
        if (block.get_by_position(result).type->is_nullable()) {
273
1.77k
            block.replace_by_position(
274
1.77k
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
1.77k
        } else {
276
548
            block.replace_by_position(result, std::move(res));
277
548
        }
278
279
2.32k
        return Status::OK();
280
2.32k
    }
_ZNK5doris10FunctionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
197
1.53k
                        uint32_t result, size_t input_rows_count) const override {
198
1.53k
        auto* in_state = reinterpret_cast<InState*>(
199
1.53k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
1.53k
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
1.53k
        auto res = ColumnUInt8::create();
205
1.53k
        ColumnUInt8::Container& vec_res = res->get_data();
206
1.53k
        vec_res.resize(input_rows_count);
207
208
1.53k
        ColumnUInt8::MutablePtr col_null_map_to;
209
1.53k
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
1.53k
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
1.53k
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
1.53k
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
1.53k
        if (in_state->use_set) {
216
1.53k
            if (materialized_column->is_nullable()) {
217
991
                const auto* null_col_ptr =
218
991
                        assert_cast<const ColumnNullable*>(materialized_column.get());
219
991
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
991
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
991
                if (nested_col_ptr->is_column_string()) {
223
510
                    const auto* column_string_ptr =
224
510
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
510
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
510
                                               column_string_ptr);
227
510
                } else {
228
                    //TODO: support other column type
229
481
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
481
                                               nested_col_ptr);
231
481
                }
232
233
991
                if (!in_state->hybrid_set->contain_null()) {
234
210k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
209k
                        vec_null_map_to[i] = null_map[i];
236
209k
                    }
237
966
                } else {
238
85
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
60
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
60
                    }
241
25
                }
242
243
991
            } else { // non-nullable
244
547
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
297
                    const auto* column_string_ptr =
246
297
                            assert_cast<const ColumnString*>(materialized_column.get());
247
297
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
297
                } else {
249
250
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
250
                }
251
252
547
                if (in_state->hybrid_set->contain_null()) {
253
2
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
1
                        vec_null_map_to[i] = negative == vec_res[i];
255
1
                    }
256
1
                }
257
547
            }
258
18.4E
        } else { //!in_state->use_set
259
18.4E
            std::vector<ColumnPtr> set_columns;
260
18.4E
            for (int i = 1; i < arguments.size(); ++i) {
261
2
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
2
            }
263
18.4E
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
18.4E
            } else {
267
18.4E
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
18.4E
                                        vec_null_map_to, materialized_column);
269
18.4E
            }
270
18.4E
        }
271
272
1.53k
        if (block.get_by_position(result).type->is_nullable()) {
273
993
            block.replace_by_position(
274
993
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
993
        } else {
276
544
            block.replace_by_position(result, std::move(res));
277
544
        }
278
279
1.53k
        return Status::OK();
280
1.53k
    }
_ZNK5doris10FunctionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
197
784
                        uint32_t result, size_t input_rows_count) const override {
198
784
        auto* in_state = reinterpret_cast<InState*>(
199
784
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
784
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
784
        auto res = ColumnUInt8::create();
205
784
        ColumnUInt8::Container& vec_res = res->get_data();
206
784
        vec_res.resize(input_rows_count);
207
208
784
        ColumnUInt8::MutablePtr col_null_map_to;
209
784
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
784
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
784
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
784
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
784
        if (in_state->use_set) {
216
784
            if (materialized_column->is_nullable()) {
217
751
                const auto* null_col_ptr =
218
751
                        assert_cast<const ColumnNullable*>(materialized_column.get());
219
751
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
751
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
751
                if (nested_col_ptr->is_column_string()) {
223
644
                    const auto* column_string_ptr =
224
644
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
644
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
644
                                               column_string_ptr);
227
644
                } else {
228
                    //TODO: support other column type
229
107
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
107
                                               nested_col_ptr);
231
107
                }
232
233
751
                if (!in_state->hybrid_set->contain_null()) {
234
4.91k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
4.65k
                        vec_null_map_to[i] = null_map[i];
236
4.65k
                    }
237
492
                } else {
238
11.7k
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
11.2k
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
11.2k
                    }
241
492
                }
242
243
751
            } else { // non-nullable
244
33
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
0
                    const auto* column_string_ptr =
246
0
                            assert_cast<const ColumnString*>(materialized_column.get());
247
0
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
33
                } else {
249
33
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
33
                }
251
252
33
                if (in_state->hybrid_set->contain_null()) {
253
79
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
50
                        vec_null_map_to[i] = negative == vec_res[i];
255
50
                    }
256
29
                }
257
33
            }
258
784
        } else { //!in_state->use_set
259
0
            std::vector<ColumnPtr> set_columns;
260
0
            for (int i = 1; i < arguments.size(); ++i) {
261
0
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
0
            }
263
0
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
0
            } else {
267
0
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
0
                                        vec_null_map_to, materialized_column);
269
0
            }
270
0
        }
271
272
784
        if (block.get_by_position(result).type->is_nullable()) {
273
780
            block.replace_by_position(
274
780
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
780
        } else {
276
4
            block.replace_by_position(result, std::move(res));
277
4
        }
278
279
784
        return Status::OK();
280
784
    }
281
282
private:
283
    template <typename T>
284
    static void search_hash_set_check_null(InState* in_state, size_t input_rows_count,
285
                                           ColumnUInt8::Container& vec_res,
286
1.74k
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
1.74k
        if constexpr (!negative) {
288
991
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
991
                                                      vec_res);
290
991
        } else {
291
751
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
751
                                                               vec_res);
293
751
        }
294
1.74k
    }
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_
Line
Count
Source
286
510
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
510
        if constexpr (!negative) {
288
510
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
510
                                                      vec_res);
290
        } else {
291
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
                                                               vec_res);
293
        }
294
510
    }
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_
Line
Count
Source
286
481
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
481
        if constexpr (!negative) {
288
481
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
481
                                                      vec_res);
290
        } else {
291
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
                                                               vec_res);
293
        }
294
481
    }
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_
Line
Count
Source
286
644
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
        if constexpr (!negative) {
288
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
                                                      vec_res);
290
644
        } else {
291
644
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
644
                                                               vec_res);
293
644
        }
294
644
    }
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_
Line
Count
Source
286
107
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
        if constexpr (!negative) {
288
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
                                                      vec_res);
290
107
        } else {
291
107
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
107
                                                               vec_res);
293
107
        }
294
107
    }
295
296
    template <typename T>
297
    static void search_hash_set(InState* in_state, size_t input_rows_count,
298
580
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
580
        if constexpr (!negative) {
300
547
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
547
        } else {
302
33
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
33
        }
304
580
    }
_ZN5doris10FunctionInILb0EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
297
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
297
        if constexpr (!negative) {
300
297
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
        } else {
302
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
        }
304
297
    }
_ZN5doris10FunctionInILb0EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
250
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
250
        if constexpr (!negative) {
300
250
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
        } else {
302
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
        }
304
250
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
_ZN5doris10FunctionInILb1EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
33
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
        if constexpr (!negative) {
300
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
33
        } else {
302
33
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
33
        }
304
33
    }
305
306
    template <bool Const>
307
    static void impl_without_set(FunctionContext* context,
308
                                 const std::vector<ColumnPtr>& set_columns, size_t input_rows_count,
309
                                 ColumnUInt8::Container& vec_res,
310
                                 ColumnUInt8::Container& vec_null_map_to,
311
1
                                 const ColumnPtr& materialized_column) {
312
2
        for (size_t i = 0; i < input_rows_count; ++i) {
313
1
            const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const));
314
1
            if (ref_data.data == nullptr) {
315
0
                vec_null_map_to[i] = true;
316
0
                continue;
317
0
            }
318
319
1
            std::vector<StringRef> set_datas;
320
            // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL,
321
            // but also if no match is found in the list and one of the expressions in the list is NULL.
322
1
            bool null_in_set = false;
323
324
2
            for (const auto& set_column : set_columns) {
325
2
                auto set_data = set_column->get_data_at(i);
326
2
                if (set_data.data == nullptr) {
327
0
                    null_in_set = true;
328
2
                } else {
329
2
                    set_datas.push_back(set_data);
330
2
                }
331
2
            }
332
1
            std::unique_ptr<HybridSetBase> hybrid_set(create_set(
333
1
                    context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true));
334
2
            for (auto& set_data : set_datas) {
335
2
                hybrid_set->insert((void*)(set_data.data), set_data.size);
336
2
            }
337
338
1
            vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
339
1
            if (null_in_set) {
340
0
                vec_null_map_to[i] = negative == vec_res[i];
341
1
            } else {
342
1
                vec_null_map_to[i] = false;
343
1
            }
344
1
        }
345
1
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb0EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
_ZN5doris10FunctionInILb0EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
Line
Count
Source
311
1
                                 const ColumnPtr& materialized_column) {
312
2
        for (size_t i = 0; i < input_rows_count; ++i) {
313
1
            const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const));
314
1
            if (ref_data.data == nullptr) {
315
0
                vec_null_map_to[i] = true;
316
0
                continue;
317
0
            }
318
319
1
            std::vector<StringRef> set_datas;
320
            // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL,
321
            // but also if no match is found in the list and one of the expressions in the list is NULL.
322
1
            bool null_in_set = false;
323
324
2
            for (const auto& set_column : set_columns) {
325
2
                auto set_data = set_column->get_data_at(i);
326
2
                if (set_data.data == nullptr) {
327
0
                    null_in_set = true;
328
2
                } else {
329
2
                    set_datas.push_back(set_data);
330
2
                }
331
2
            }
332
1
            std::unique_ptr<HybridSetBase> hybrid_set(create_set(
333
1
                    context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true));
334
2
            for (auto& set_data : set_datas) {
335
2
                hybrid_set->insert((void*)(set_data.data), set_data.size);
336
2
            }
337
338
1
            vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
339
1
            if (null_in_set) {
340
0
                vec_null_map_to[i] = negative == vec_res[i];
341
1
            } else {
342
1
                vec_null_map_to[i] = false;
343
1
            }
344
1
        }
345
1
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
346
};
347
348
} // namespace doris