Coverage Report

Created: 2026-05-15 08:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/in.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
// This file is copied from
18
19
#pragma once
20
21
#include <glog/logging.h>
22
23
#include <boost/iterator/iterator_facade.hpp>
24
#include <cstddef>
25
#include <memory>
26
#include <utility>
27
#include <vector>
28
29
#include "common/status.h"
30
#include "core/block/block.h"
31
#include "core/block/column_numbers.h"
32
#include "core/block/column_with_type_and_name.h"
33
#include "core/column/column.h"
34
#include "core/column/column_const.h"
35
#include "core/column/column_nullable.h"
36
#include "core/column/column_vector.h"
37
#include "core/data_type/data_type.h"
38
#include "core/data_type/data_type_nullable.h"
39
#include "core/data_type/data_type_number.h"
40
#include "core/data_type/define_primitive_type.h"
41
#include "core/field.h"
42
#include "core/string_ref.h"
43
#include "core/types.h"
44
#include "exprs/aggregate/aggregate_function.h"
45
#include "exprs/create_predicate_function.h"
46
#include "exprs/function/function.h"
47
#include "exprs/function_context.h"
48
#include "exprs/hybrid_set.h"
49
#include "storage/index/index_reader_helper.h"
50
51
namespace doris {
52
53
template <typename T>
54
class ColumnStr;
55
using ColumnString = ColumnStr<UInt32>;
56
57
struct InState {
58
    bool use_set = true;
59
    std::shared_ptr<HybridSetBase> hybrid_set;
60
};
61
62
template <bool negative>
63
class FunctionIn : public IFunction {
64
public:
65
    static constexpr auto name = negative ? "not_in" : "in";
66
67
1.26k
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
_ZN5doris10FunctionInILb0EE6createEv
Line
Count
Source
67
1.08k
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
_ZN5doris10FunctionInILb1EE6createEv
Line
Count
Source
67
184
    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
68
69
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE8get_nameB5cxx11Ev
70
71
1.24k
    bool is_variadic() const override { return true; }
_ZNK5doris10FunctionInILb0EE11is_variadicEv
Line
Count
Source
71
1.07k
    bool is_variadic() const override { return true; }
_ZNK5doris10FunctionInILb1EE11is_variadicEv
Line
Count
Source
71
176
    bool is_variadic() const override { return true; }
72
73
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE23get_number_of_argumentsEv
74
75
1.24k
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
2.54k
        for (const auto& arg : args) {
77
2.54k
            if (arg->is_nullable()) {
78
742
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
742
            }
80
2.54k
        }
81
504
        return std::make_shared<DataTypeUInt8>();
82
1.24k
    }
_ZNK5doris10FunctionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
75
1.07k
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
2.26k
        for (const auto& arg : args) {
77
2.26k
            if (arg->is_nullable()) {
78
605
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
605
            }
80
2.26k
        }
81
466
        return std::make_shared<DataTypeUInt8>();
82
1.07k
    }
_ZNK5doris10FunctionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
75
175
    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
76
282
        for (const auto& arg : args) {
77
282
            if (arg->is_nullable()) {
78
137
                return make_nullable(std::make_shared<DataTypeUInt8>());
79
137
            }
80
282
        }
81
38
        return std::make_shared<DataTypeUInt8>();
82
175
    }
83
84
3.73k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris10FunctionInILb0EE36use_default_implementation_for_nullsEv
Line
Count
Source
84
2.77k
    bool use_default_implementation_for_nulls() const override { return false; }
_ZNK5doris10FunctionInILb1EE36use_default_implementation_for_nullsEv
Line
Count
Source
84
963
    bool use_default_implementation_for_nulls() const override { return false; }
85
86
    // size of [ in ( 1 , 2  , 3 , null) ]  is 3
87
1.24k
    size_t get_size_with_out_null(FunctionContext* context) {
88
1.24k
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
12
            return context->get_num_args() - 1;
90
12
        }
91
1.22k
        size_t sz = 0;
92
4.13k
        for (int i = 1; i < context->get_num_args(); ++i) {
93
2.90k
            const auto& const_column_ptr = context->get_constant_col(i);
94
2.90k
            if (const_column_ptr != nullptr) {
95
2.90k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
2.90k
                if (const_data.data != nullptr) {
97
2.86k
                    sz++;
98
2.86k
                }
99
2.90k
            }
100
2.90k
        }
101
1.22k
        return sz;
102
1.24k
    }
_ZN5doris10FunctionInILb0EE22get_size_with_out_nullEPNS_15FunctionContextE
Line
Count
Source
87
1.06k
    size_t get_size_with_out_null(FunctionContext* context) {
88
1.06k
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
12
            return context->get_num_args() - 1;
90
12
        }
91
1.05k
        size_t sz = 0;
92
3.53k
        for (int i = 1; i < context->get_num_args(); ++i) {
93
2.47k
            const auto& const_column_ptr = context->get_constant_col(i);
94
2.47k
            if (const_column_ptr != nullptr) {
95
2.47k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
2.47k
                if (const_data.data != nullptr) {
97
2.45k
                    sz++;
98
2.45k
                }
99
2.47k
            }
100
2.47k
        }
101
1.05k
        return sz;
102
1.06k
    }
_ZN5doris10FunctionInILb1EE22get_size_with_out_nullEPNS_15FunctionContextE
Line
Count
Source
87
173
    size_t get_size_with_out_null(FunctionContext* context) {
88
173
        if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) {
89
0
            return context->get_num_args() - 1;
90
0
        }
91
173
        size_t sz = 0;
92
605
        for (int i = 1; i < context->get_num_args(); ++i) {
93
432
            const auto& const_column_ptr = context->get_constant_col(i);
94
432
            if (const_column_ptr != nullptr) {
95
432
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
96
432
                if (const_data.data != nullptr) {
97
412
                    sz++;
98
412
                }
99
432
            }
100
432
        }
101
173
        return sz;
102
173
    }
103
104
9.57k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
9.57k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
8.33k
            return Status::OK();
107
8.33k
        }
108
1.24k
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
1.24k
        context->set_function_state(scope, state);
110
1.24k
        DCHECK(context->get_num_args() >= 1);
111
1.24k
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
1.24k
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
1.24k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
1.24k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
316
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
925
        } else {
119
925
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
925
                                               get_size_with_out_null(context), true));
121
925
        }
122
123
4.30k
        for (int i = 1; i < context->get_num_args(); ++i) {
124
3.06k
            const auto& const_column_ptr = context->get_constant_col(i);
125
3.06k
            if (const_column_ptr != nullptr) {
126
3.06k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
3.06k
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
3.06k
            } else {
129
1
                state->use_set = false;
130
1
                state->hybrid_set.reset();
131
1
                break;
132
1
            }
133
3.06k
        }
134
1.24k
        return Status::OK();
135
9.57k
    }
_ZN5doris10FunctionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
104
7.32k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
7.32k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
6.25k
            return Status::OK();
107
6.25k
        }
108
1.06k
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
1.06k
        context->set_function_state(scope, state);
110
1.06k
        DCHECK(context->get_num_args() >= 1);
111
1.06k
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
1.06k
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
1.06k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
1.06k
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
233
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
835
        } else {
119
835
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
835
                                               get_size_with_out_null(context), true));
121
835
        }
122
123
3.70k
        for (int i = 1; i < context->get_num_args(); ++i) {
124
2.63k
            const auto& const_column_ptr = context->get_constant_col(i);
125
2.63k
            if (const_column_ptr != nullptr) {
126
2.63k
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
2.63k
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
2.63k
            } else {
129
1
                state->use_set = false;
130
1
                state->hybrid_set.reset();
131
1
                break;
132
1
            }
133
2.63k
        }
134
1.06k
        return Status::OK();
135
7.32k
    }
_ZN5doris10FunctionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE
Line
Count
Source
104
2.25k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
105
2.25k
        if (scope == FunctionContext::THREAD_LOCAL) {
106
2.07k
            return Status::OK();
107
2.07k
        }
108
173
        std::shared_ptr<InState> state = std::make_shared<InState>();
109
173
        context->set_function_state(scope, state);
110
173
        DCHECK(context->get_num_args() >= 1);
111
173
        if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) {
112
0
            state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true));
113
173
        } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR ||
114
173
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR ||
115
173
                   context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) {
116
            // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly
117
83
            state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true));
118
90
        } else {
119
90
            state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(),
120
90
                                               get_size_with_out_null(context), true));
121
90
        }
122
123
605
        for (int i = 1; i < context->get_num_args(); ++i) {
124
432
            const auto& const_column_ptr = context->get_constant_col(i);
125
432
            if (const_column_ptr != nullptr) {
126
432
                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
127
432
                state->hybrid_set->insert((void*)const_data.data, const_data.size);
128
432
            } else {
129
0
                state->use_set = false;
130
0
                state->hybrid_set.reset();
131
0
                break;
132
0
            }
133
432
        }
134
173
        return Status::OK();
135
2.25k
    }
136
137
    Status evaluate_inverted_index(
138
            const ColumnsWithTypeAndName& arguments,
139
            const std::vector<IndexFieldNameAndTypePair>& data_type_with_names,
140
            std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows,
141
            const InvertedIndexAnalyzerCtx* analyzer_ctx,
142
716
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
716
        DCHECK(data_type_with_names.size() == 1);
144
716
        DCHECK(iterators.size() == 1);
145
716
        auto* iter = iterators[0];
146
716
        auto data_type_with_name = data_type_with_names[0];
147
716
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
716
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
716
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
716
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
140
            return Status::OK();
156
140
        }
157
576
        if (iter->has_null()) {
158
576
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
576
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
576
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
576
        }
162
718
        for (const auto& arg : arguments) {
163
718
            Field param_value;
164
718
            arg.column->get(0, param_value);
165
718
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
508
                if (negative) {
168
505
                    return Status::OK();
169
505
                }
170
3
                *roaring |= *null_bitmap;
171
3
                continue;
172
508
            }
173
210
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
210
            segment_v2::InvertedIndexParam param;
175
210
            param.column_name = data_type_with_name.first;
176
210
            param.column_type = data_type_with_name.second;
177
210
            param.query_value = param_value;
178
210
            param.query_type = query_type;
179
210
            param.num_rows = num_rows;
180
210
            param.roaring = std::make_shared<roaring::Roaring>();
181
210
            param.analyzer_ctx = analyzer_ctx;
182
210
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
203
            *roaring |= *param.roaring;
184
203
        }
185
64
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
64
        bitmap_result = result;
187
64
        bitmap_result.mask_out_null();
188
64
        if constexpr (negative) {
189
31
            roaring::Roaring full_result;
190
31
            full_result.addRange(0, num_rows);
191
31
            bitmap_result.op_not(&full_result);
192
31
        }
193
64
        return Status::OK();
194
576
    }
_ZNK5doris10FunctionInILb0EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE
Line
Count
Source
142
97
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
97
        DCHECK(data_type_with_names.size() == 1);
144
97
        DCHECK(iterators.size() == 1);
145
97
        auto* iter = iterators[0];
146
97
        auto data_type_with_name = data_type_with_names[0];
147
97
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
97
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
97
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
97
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
64
            return Status::OK();
156
64
        }
157
33
        if (iter->has_null()) {
158
33
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
33
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
33
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
33
        }
162
66
        for (const auto& arg : arguments) {
163
66
            Field param_value;
164
66
            arg.column->get(0, param_value);
165
66
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
3
                if (negative) {
168
0
                    return Status::OK();
169
0
                }
170
3
                *roaring |= *null_bitmap;
171
3
                continue;
172
3
            }
173
63
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
63
            segment_v2::InvertedIndexParam param;
175
63
            param.column_name = data_type_with_name.first;
176
63
            param.column_type = data_type_with_name.second;
177
63
            param.query_value = param_value;
178
63
            param.query_type = query_type;
179
63
            param.num_rows = num_rows;
180
63
            param.roaring = std::make_shared<roaring::Roaring>();
181
63
            param.analyzer_ctx = analyzer_ctx;
182
63
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
63
            *roaring |= *param.roaring;
184
63
        }
185
33
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
33
        bitmap_result = result;
187
33
        bitmap_result.mask_out_null();
188
        if constexpr (negative) {
189
            roaring::Roaring full_result;
190
            full_result.addRange(0, num_rows);
191
            bitmap_result.op_not(&full_result);
192
        }
193
33
        return Status::OK();
194
33
    }
_ZNK5doris10FunctionInILb1EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE
Line
Count
Source
142
619
            segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
143
619
        DCHECK(data_type_with_names.size() == 1);
144
619
        DCHECK(iterators.size() == 1);
145
619
        auto* iter = iterators[0];
146
619
        auto data_type_with_name = data_type_with_names[0];
147
619
        std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
148
619
        std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
149
150
619
        if (iter == nullptr) {
151
0
            return Status::OK();
152
0
        }
153
619
        if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) {
154
            //NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
155
76
            return Status::OK();
156
76
        }
157
543
        if (iter->has_null()) {
158
543
            segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
159
543
            RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
160
543
            null_bitmap = null_bitmap_cache_handle.get_bitmap();
161
543
        }
162
652
        for (const auto& arg : arguments) {
163
652
            Field param_value;
164
652
            arg.column->get(0, param_value);
165
652
            if (param_value.is_null()) {
166
                // predicate like column NOT IN (NULL, '') should not push down to index.
167
505
                if (negative) {
168
505
                    return Status::OK();
169
505
                }
170
0
                *roaring |= *null_bitmap;
171
0
                continue;
172
505
            }
173
147
            InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
174
147
            segment_v2::InvertedIndexParam param;
175
147
            param.column_name = data_type_with_name.first;
176
147
            param.column_type = data_type_with_name.second;
177
147
            param.query_value = param_value;
178
147
            param.query_type = query_type;
179
147
            param.num_rows = num_rows;
180
147
            param.roaring = std::make_shared<roaring::Roaring>();
181
147
            param.analyzer_ctx = analyzer_ctx;
182
147
            RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {&param}));
183
140
            *roaring |= *param.roaring;
184
140
        }
185
31
        segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
186
31
        bitmap_result = result;
187
31
        bitmap_result.mask_out_null();
188
31
        if constexpr (negative) {
189
31
            roaring::Roaring full_result;
190
31
            full_result.addRange(0, num_rows);
191
31
            bitmap_result.op_not(&full_result);
192
31
        }
193
31
        return Status::OK();
194
543
    }
195
196
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
197
2.48k
                        uint32_t result, size_t input_rows_count) const override {
198
2.48k
        auto* in_state = reinterpret_cast<InState*>(
199
2.48k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
2.48k
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
2.48k
        auto res = ColumnUInt8::create();
205
2.48k
        ColumnUInt8::Container& vec_res = res->get_data();
206
2.48k
        vec_res.resize(input_rows_count);
207
208
2.48k
        ColumnUInt8::MutablePtr col_null_map_to;
209
2.48k
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
2.48k
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
2.48k
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
2.48k
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
2.48k
        if (in_state->use_set) {
216
2.48k
            if (materialized_column->is_nullable()) {
217
1.74k
                const auto* null_col_ptr =
218
1.74k
                        assert_cast<const ColumnNullable*>(materialized_column.get());
219
1.74k
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
1.74k
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
1.74k
                if (nested_col_ptr->is_column_string()) {
223
1.16k
                    const auto* column_string_ptr =
224
1.16k
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
1.16k
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
1.16k
                                               column_string_ptr);
227
1.16k
                } else {
228
                    //TODO: support other column type
229
580
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
580
                                               nested_col_ptr);
231
580
                }
232
233
1.74k
                if (!in_state->hybrid_set->contain_null()) {
234
214k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
212k
                        vec_null_map_to[i] = null_map[i];
236
212k
                    }
237
1.22k
                } else {
238
11.8k
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
11.3k
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
11.3k
                    }
241
517
                }
242
243
1.74k
            } else { // non-nullable
244
747
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
499
                    const auto* column_string_ptr =
246
499
                            assert_cast<const ColumnString*>(materialized_column.get());
247
499
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
499
                } else {
249
248
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
248
                }
251
252
747
                if (in_state->hybrid_set->contain_null()) {
253
81
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
51
                        vec_null_map_to[i] = negative == vec_res[i];
255
51
                    }
256
30
                }
257
747
            }
258
2.48k
        } else { //!in_state->use_set
259
1
            std::vector<ColumnPtr> set_columns;
260
3
            for (int i = 1; i < arguments.size(); ++i) {
261
2
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
2
            }
263
1
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
1
            } else {
267
1
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
1
                                        vec_null_map_to, materialized_column);
269
1
            }
270
1
        }
271
272
2.48k
        if (block.get_by_position(result).type->is_nullable()) {
273
1.77k
            block.replace_by_position(
274
1.77k
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
1.77k
        } else {
276
717
            block.replace_by_position(result, std::move(res));
277
717
        }
278
279
2.48k
        return Status::OK();
280
2.48k
    }
_ZNK5doris10FunctionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
197
1.70k
                        uint32_t result, size_t input_rows_count) const override {
198
1.70k
        auto* in_state = reinterpret_cast<InState*>(
199
1.70k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
1.70k
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
1.70k
        auto res = ColumnUInt8::create();
205
1.70k
        ColumnUInt8::Container& vec_res = res->get_data();
206
1.70k
        vec_res.resize(input_rows_count);
207
208
1.70k
        ColumnUInt8::MutablePtr col_null_map_to;
209
1.70k
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
1.70k
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
1.70k
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
1.70k
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
1.70k
        if (in_state->use_set) {
216
1.69k
            if (materialized_column->is_nullable()) {
217
985
                const auto* null_col_ptr =
218
985
                        assert_cast<const ColumnNullable*>(materialized_column.get());
219
985
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
985
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
985
                if (nested_col_ptr->is_column_string()) {
223
512
                    const auto* column_string_ptr =
224
512
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
512
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
512
                                               column_string_ptr);
227
512
                } else {
228
                    //TODO: support other column type
229
473
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
473
                                               nested_col_ptr);
231
473
                }
232
233
985
                if (!in_state->hybrid_set->contain_null()) {
234
209k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
208k
                        vec_null_map_to[i] = null_map[i];
236
208k
                    }
237
960
                } else {
238
85
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
60
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
60
                    }
241
25
                }
242
243
985
            } else { // non-nullable
244
714
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
499
                    const auto* column_string_ptr =
246
499
                            assert_cast<const ColumnString*>(materialized_column.get());
247
499
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
499
                } else {
249
215
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
215
                }
251
252
714
                if (in_state->hybrid_set->contain_null()) {
253
2
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
1
                        vec_null_map_to[i] = negative == vec_res[i];
255
1
                    }
256
1
                }
257
714
            }
258
1.69k
        } else { //!in_state->use_set
259
1
            std::vector<ColumnPtr> set_columns;
260
3
            for (int i = 1; i < arguments.size(); ++i) {
261
2
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
2
            }
263
1
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
1
            } else {
267
1
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
1
                                        vec_null_map_to, materialized_column);
269
1
            }
270
1
        }
271
272
1.70k
        if (block.get_by_position(result).type->is_nullable()) {
273
987
            block.replace_by_position(
274
987
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
987
        } else {
276
713
            block.replace_by_position(result, std::move(res));
277
713
        }
278
279
1.70k
        return Status::OK();
280
1.70k
    }
_ZNK5doris10FunctionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
197
788
                        uint32_t result, size_t input_rows_count) const override {
198
788
        auto* in_state = reinterpret_cast<InState*>(
199
788
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
200
788
        if (!in_state) {
201
0
            return Status::RuntimeError("funciton context for function '{}' must have Set;",
202
0
                                        get_name());
203
0
        }
204
788
        auto res = ColumnUInt8::create();
205
788
        ColumnUInt8::Container& vec_res = res->get_data();
206
788
        vec_res.resize(input_rows_count);
207
208
788
        ColumnUInt8::MutablePtr col_null_map_to;
209
788
        col_null_map_to = ColumnUInt8::create(input_rows_count, false);
210
788
        auto& vec_null_map_to = col_null_map_to->get_data();
211
212
788
        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
213
788
        const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column);
214
215
788
        if (in_state->use_set) {
216
788
            if (materialized_column->is_nullable()) {
217
755
                const auto* null_col_ptr =
218
755
                        assert_cast<const ColumnNullable*>(materialized_column.get());
219
755
                const auto& null_map = null_col_ptr->get_null_map_column().get_data();
220
755
                const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get();
221
222
755
                if (nested_col_ptr->is_column_string()) {
223
648
                    const auto* column_string_ptr =
224
648
                            assert_cast<const ColumnString*>(nested_col_ptr);
225
648
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
226
648
                                               column_string_ptr);
227
648
                } else {
228
                    //TODO: support other column type
229
107
                    search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map,
230
107
                                               nested_col_ptr);
231
107
                }
232
233
755
                if (!in_state->hybrid_set->contain_null()) {
234
4.92k
                    for (size_t i = 0; i < input_rows_count; ++i) {
235
4.65k
                        vec_null_map_to[i] = null_map[i];
236
4.65k
                    }
237
492
                } else {
238
11.7k
                    for (size_t i = 0; i < input_rows_count; ++i) {
239
11.2k
                        vec_null_map_to[i] = null_map[i] || negative == vec_res[i];
240
11.2k
                    }
241
492
                }
242
243
755
            } else { // non-nullable
244
33
                if (is_string_type(left_arg.type->get_primitive_type())) {
245
0
                    const auto* column_string_ptr =
246
0
                            assert_cast<const ColumnString*>(materialized_column.get());
247
0
                    search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr);
248
33
                } else {
249
33
                    search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get());
250
33
                }
251
252
33
                if (in_state->hybrid_set->contain_null()) {
253
79
                    for (size_t i = 0; i < input_rows_count; ++i) {
254
50
                        vec_null_map_to[i] = negative == vec_res[i];
255
50
                    }
256
29
                }
257
33
            }
258
788
        } else { //!in_state->use_set
259
0
            std::vector<ColumnPtr> set_columns;
260
0
            for (int i = 1; i < arguments.size(); ++i) {
261
0
                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
262
0
            }
263
0
            if (col_const) {
264
0
                impl_without_set<true>(context, set_columns, input_rows_count, vec_res,
265
0
                                       vec_null_map_to, materialized_column);
266
0
            } else {
267
0
                impl_without_set<false>(context, set_columns, input_rows_count, vec_res,
268
0
                                        vec_null_map_to, materialized_column);
269
0
            }
270
0
        }
271
272
788
        if (block.get_by_position(result).type->is_nullable()) {
273
784
            block.replace_by_position(
274
784
                    result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
275
784
        } else {
276
4
            block.replace_by_position(result, std::move(res));
277
4
        }
278
279
788
        return Status::OK();
280
788
    }
281
282
private:
283
    template <typename T>
284
    static void search_hash_set_check_null(InState* in_state, size_t input_rows_count,
285
                                           ColumnUInt8::Container& vec_res,
286
1.74k
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
1.74k
        if constexpr (!negative) {
288
985
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
985
                                                      vec_res);
290
985
        } else {
291
755
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
755
                                                               vec_res);
293
755
        }
294
1.74k
    }
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_
Line
Count
Source
286
512
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
512
        if constexpr (!negative) {
288
512
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
512
                                                      vec_res);
290
        } else {
291
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
                                                               vec_res);
293
        }
294
512
    }
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_
Line
Count
Source
286
473
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
473
        if constexpr (!negative) {
288
473
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
473
                                                      vec_res);
290
        } else {
291
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
                                                               vec_res);
293
        }
294
473
    }
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_
Line
Count
Source
286
648
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
        if constexpr (!negative) {
288
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
                                                      vec_res);
290
648
        } else {
291
648
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
648
                                                               vec_res);
293
648
        }
294
648
    }
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_
Line
Count
Source
286
107
                                           const ColumnUInt8::Container& null_map, T* col_ptr) {
287
        if constexpr (!negative) {
288
            in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map,
289
                                                      vec_res);
290
107
        } else {
291
107
            in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map,
292
107
                                                               vec_res);
293
107
        }
294
107
    }
295
296
    template <typename T>
297
    static void search_hash_set(InState* in_state, size_t input_rows_count,
298
747
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
747
        if constexpr (!negative) {
300
714
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
714
        } else {
302
33
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
33
        }
304
747
    }
_ZN5doris10FunctionInILb0EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
499
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
499
        if constexpr (!negative) {
300
499
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
        } else {
302
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
        }
304
499
    }
_ZN5doris10FunctionInILb0EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
215
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
215
        if constexpr (!negative) {
300
215
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
        } else {
302
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
        }
304
215
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
_ZN5doris10FunctionInILb1EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_
Line
Count
Source
298
33
                                ColumnUInt8::Container& vec_res, T* col_ptr) {
299
        if constexpr (!negative) {
300
            in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res);
301
33
        } else {
302
33
            in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res);
303
33
        }
304
33
    }
305
306
    template <bool Const>
307
    static void impl_without_set(FunctionContext* context,
308
                                 const std::vector<ColumnPtr>& set_columns, size_t input_rows_count,
309
                                 ColumnUInt8::Container& vec_res,
310
                                 ColumnUInt8::Container& vec_null_map_to,
311
1
                                 const ColumnPtr& materialized_column) {
312
2
        for (size_t i = 0; i < input_rows_count; ++i) {
313
1
            const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const));
314
1
            if (ref_data.data == nullptr) {
315
0
                vec_null_map_to[i] = true;
316
0
                continue;
317
0
            }
318
319
1
            std::vector<StringRef> set_datas;
320
            // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL,
321
            // but also if no match is found in the list and one of the expressions in the list is NULL.
322
1
            bool null_in_set = false;
323
324
2
            for (const auto& set_column : set_columns) {
325
2
                auto set_data = set_column->get_data_at(i);
326
2
                if (set_data.data == nullptr) {
327
0
                    null_in_set = true;
328
2
                } else {
329
2
                    set_datas.push_back(set_data);
330
2
                }
331
2
            }
332
1
            std::unique_ptr<HybridSetBase> hybrid_set(create_set(
333
1
                    context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true));
334
2
            for (auto& set_data : set_datas) {
335
2
                hybrid_set->insert((void*)(set_data.data), set_data.size);
336
2
            }
337
338
1
            vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
339
1
            if (null_in_set) {
340
0
                vec_null_map_to[i] = negative == vec_res[i];
341
1
            } else {
342
1
                vec_null_map_to[i] = false;
343
1
            }
344
1
        }
345
1
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb0EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
_ZN5doris10FunctionInILb0EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
Line
Count
Source
311
1
                                 const ColumnPtr& materialized_column) {
312
2
        for (size_t i = 0; i < input_rows_count; ++i) {
313
1
            const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const));
314
1
            if (ref_data.data == nullptr) {
315
0
                vec_null_map_to[i] = true;
316
0
                continue;
317
0
            }
318
319
1
            std::vector<StringRef> set_datas;
320
            // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL,
321
            // but also if no match is found in the list and one of the expressions in the list is NULL.
322
1
            bool null_in_set = false;
323
324
2
            for (const auto& set_column : set_columns) {
325
2
                auto set_data = set_column->get_data_at(i);
326
2
                if (set_data.data == nullptr) {
327
0
                    null_in_set = true;
328
2
                } else {
329
2
                    set_datas.push_back(set_data);
330
2
                }
331
2
            }
332
1
            std::unique_ptr<HybridSetBase> hybrid_set(create_set(
333
1
                    context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true));
334
2
            for (auto& set_data : set_datas) {
335
2
                hybrid_set->insert((void*)(set_data.data), set_data.size);
336
2
            }
337
338
1
            vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size);
339
1
            if (null_in_set) {
340
0
                vec_null_map_to[i] = negative == vec_res[i];
341
1
            } else {
342
1
                vec_null_map_to[i] = false;
343
1
            }
344
1
        }
345
1
    }
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_
346
};
347
348
} // namespace doris