be/src/exprs/function/in.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | |
19 | | #pragma once |
20 | | |
21 | | #include <glog/logging.h> |
22 | | |
23 | | #include <boost/iterator/iterator_facade.hpp> |
24 | | #include <cstddef> |
25 | | #include <memory> |
26 | | #include <utility> |
27 | | #include <vector> |
28 | | |
29 | | #include "common/status.h" |
30 | | #include "core/block/block.h" |
31 | | #include "core/block/column_numbers.h" |
32 | | #include "core/block/column_with_type_and_name.h" |
33 | | #include "core/column/column.h" |
34 | | #include "core/column/column_const.h" |
35 | | #include "core/column/column_nullable.h" |
36 | | #include "core/column/column_vector.h" |
37 | | #include "core/data_type/data_type.h" |
38 | | #include "core/data_type/data_type_nullable.h" |
39 | | #include "core/data_type/data_type_number.h" |
40 | | #include "core/data_type/define_primitive_type.h" |
41 | | #include "core/field.h" |
42 | | #include "core/string_ref.h" |
43 | | #include "core/types.h" |
44 | | #include "exprs/aggregate/aggregate_function.h" |
45 | | #include "exprs/create_predicate_function.h" |
46 | | #include "exprs/function/function.h" |
47 | | #include "exprs/function_context.h" |
48 | | #include "exprs/hybrid_set.h" |
49 | | #include "storage/index/index_reader_helper.h" |
50 | | |
51 | | namespace doris { |
52 | | |
53 | | template <typename T> |
54 | | class ColumnStr; |
55 | | using ColumnString = ColumnStr<UInt32>; |
56 | | |
57 | | struct InState { |
58 | | bool use_set = true; |
59 | | std::shared_ptr<HybridSetBase> hybrid_set; |
60 | | }; |
61 | | |
62 | | template <bool negative> |
63 | | class FunctionIn : public IFunction { |
64 | | public: |
65 | | static constexpr auto name = negative ? "not_in" : "in"; |
66 | | |
67 | 693 | static FunctionPtr create() { return std::make_shared<FunctionIn>(); }_ZN5doris10FunctionInILb0EE6createEv Line | Count | Source | 67 | 675 | static FunctionPtr create() { return std::make_shared<FunctionIn>(); } |
_ZN5doris10FunctionInILb1EE6createEv Line | Count | Source | 67 | 18 | static FunctionPtr create() { return std::make_shared<FunctionIn>(); } |
|
68 | | |
69 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE8get_nameB5cxx11Ev |
70 | | |
71 | 679 | bool is_variadic() const override { return true; }_ZNK5doris10FunctionInILb0EE11is_variadicEv Line | Count | Source | 71 | 668 | bool is_variadic() const override { return true; } |
_ZNK5doris10FunctionInILb1EE11is_variadicEv Line | Count | Source | 71 | 11 | bool is_variadic() const override { return true; } |
|
72 | | |
73 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE23get_number_of_argumentsEv |
74 | | |
75 | 677 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { |
76 | 782 | for (const auto& arg : args) { |
77 | 782 | if (arg->is_nullable()) { |
78 | 665 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
79 | 665 | } |
80 | 782 | } |
81 | 12 | return std::make_shared<DataTypeUInt8>(); |
82 | 677 | } _ZNK5doris10FunctionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 75 | 667 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { | 76 | 771 | for (const auto& arg : args) { | 77 | 771 | if (arg->is_nullable()) { | 78 | 655 | return make_nullable(std::make_shared<DataTypeUInt8>()); | 79 | 655 | } | 80 | 771 | } | 81 | 12 | return std::make_shared<DataTypeUInt8>(); | 82 | 667 | } |
_ZNK5doris10FunctionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 75 | 10 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { | 76 | 11 | for (const auto& arg : args) { | 77 | 11 | if (arg->is_nullable()) { | 78 | 10 | return make_nullable(std::make_shared<DataTypeUInt8>()); | 79 | 10 | } | 80 | 11 | } | 81 | 0 | return std::make_shared<DataTypeUInt8>(); | 82 | 10 | } |
|
83 | | |
84 | 16.5k | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris10FunctionInILb0EE36use_default_implementation_for_nullsEv Line | Count | Source | 84 | 16.5k | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris10FunctionInILb1EE36use_default_implementation_for_nullsEv Line | Count | Source | 84 | 28 | bool use_default_implementation_for_nulls() const override { return false; } |
|
85 | | |
86 | | // size of [ in ( 1 , 2 , 3 , null) ] is 3 |
87 | 672 | size_t get_size_with_out_null(FunctionContext* context) { |
88 | 672 | if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) { |
89 | 4 | return context->get_num_args() - 1; |
90 | 4 | } |
91 | 668 | size_t sz = 0; |
92 | 2.20k | for (int i = 1; i < context->get_num_args(); ++i) { |
93 | 1.54k | const auto& const_column_ptr = context->get_constant_col(i); |
94 | 1.54k | if (const_column_ptr != nullptr) { |
95 | 1.54k | auto const_data = const_column_ptr->column_ptr->get_data_at(0); |
96 | 1.54k | if (const_data.data != nullptr) { |
97 | 1.52k | sz++; |
98 | 1.52k | } |
99 | 1.54k | } |
100 | 1.54k | } |
101 | 668 | return sz; |
102 | 672 | } _ZN5doris10FunctionInILb0EE22get_size_with_out_nullEPNS_15FunctionContextE Line | Count | Source | 87 | 664 | size_t get_size_with_out_null(FunctionContext* context) { | 88 | 664 | if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) { | 89 | 4 | return context->get_num_args() - 1; | 90 | 4 | } | 91 | 660 | size_t sz = 0; | 92 | 2.18k | for (int i = 1; i < context->get_num_args(); ++i) { | 93 | 1.52k | const auto& const_column_ptr = context->get_constant_col(i); | 94 | 1.52k | if (const_column_ptr != nullptr) { | 95 | 1.52k | auto const_data = const_column_ptr->column_ptr->get_data_at(0); | 96 | 1.52k | if (const_data.data != nullptr) { | 97 | 1.51k | sz++; | 98 | 1.51k | } | 99 | 1.52k | } | 100 | 1.52k | } | 101 | 660 | return sz; | 102 | 664 | } |
_ZN5doris10FunctionInILb1EE22get_size_with_out_nullEPNS_15FunctionContextE Line | Count | Source | 87 | 8 | size_t get_size_with_out_null(FunctionContext* context) { | 88 | 8 | if ((context->get_num_args() - 1) > FIXED_CONTAINER_MAX_SIZE) { | 89 | 0 | return context->get_num_args() - 1; | 90 | 0 | } | 91 | 8 | size_t sz = 0; | 92 | 26 | for (int i = 1; i < context->get_num_args(); ++i) { | 93 | 18 | const auto& const_column_ptr = context->get_constant_col(i); | 94 | 18 | if (const_column_ptr != nullptr) { | 95 | 18 | auto const_data = const_column_ptr->column_ptr->get_data_at(0); | 96 | 18 | if (const_data.data != nullptr) { | 97 | 18 | sz++; | 98 | 18 | } | 99 | 18 | } | 100 | 18 | } | 101 | 8 | return sz; | 102 | 8 | } |
|
103 | | |
104 | 4.13k | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
105 | 4.13k | if (scope == FunctionContext::THREAD_LOCAL) { |
106 | 3.46k | return Status::OK(); |
107 | 3.46k | } |
108 | 674 | std::shared_ptr<InState> state = std::make_shared<InState>(); |
109 | 674 | context->set_function_state(scope, state); |
110 | 674 | DCHECK(context->get_num_args() >= 1); |
111 | 674 | if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) { |
112 | 0 | state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true)); |
113 | 674 | } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR || |
114 | 674 | context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR || |
115 | 674 | context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) { |
116 | | // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly |
117 | 194 | state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true)); |
118 | 480 | } else { |
119 | 480 | state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(), |
120 | 480 | get_size_with_out_null(context), true)); |
121 | 480 | } |
122 | | |
123 | 2.29k | for (int i = 1; i < context->get_num_args(); ++i) { |
124 | 1.62k | const auto& const_column_ptr = context->get_constant_col(i); |
125 | 1.62k | if (const_column_ptr != nullptr) { |
126 | 1.62k | auto const_data = const_column_ptr->column_ptr->get_data_at(0); |
127 | 1.62k | state->hybrid_set->insert((void*)const_data.data, const_data.size); |
128 | 1.62k | } else { |
129 | 0 | state->use_set = false; |
130 | 0 | state->hybrid_set.reset(); |
131 | 0 | break; |
132 | 0 | } |
133 | 1.62k | } |
134 | 674 | return Status::OK(); |
135 | 4.13k | } _ZN5doris10FunctionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE Line | Count | Source | 104 | 4.07k | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 105 | 4.07k | if (scope == FunctionContext::THREAD_LOCAL) { | 106 | 3.40k | return Status::OK(); | 107 | 3.40k | } | 108 | 666 | std::shared_ptr<InState> state = std::make_shared<InState>(); | 109 | 666 | context->set_function_state(scope, state); | 110 | 666 | DCHECK(context->get_num_args() >= 1); | 111 | 666 | if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) { | 112 | 0 | state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true)); | 113 | 666 | } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR || | 114 | 666 | context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR || | 115 | 666 | context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) { | 116 | | // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly | 117 | 190 | state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true)); | 118 | 476 | } else { | 119 | 476 | state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(), | 120 | 476 | get_size_with_out_null(context), true)); | 121 | 476 | } | 122 | | | 123 | 2.27k | for (int i = 1; i < context->get_num_args(); ++i) { | 124 | 1.60k | const auto& const_column_ptr = context->get_constant_col(i); | 125 | 1.60k | if (const_column_ptr != nullptr) { | 126 | 1.60k | auto const_data = const_column_ptr->column_ptr->get_data_at(0); | 127 | 1.60k | state->hybrid_set->insert((void*)const_data.data, const_data.size); | 128 | 1.60k | } else { | 129 | 0 | state->use_set = false; | 130 | 0 | state->hybrid_set.reset(); | 131 | 0 | break; | 132 | 0 | } | 133 | 1.60k | } | 134 | 666 | return Status::OK(); | 135 | 4.07k | } |
_ZN5doris10FunctionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE Line | Count | Source | 104 | 64 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 105 | 64 | if (scope == FunctionContext::THREAD_LOCAL) { | 106 | 56 | return Status::OK(); | 107 | 56 | } | 108 | 8 | std::shared_ptr<InState> state = std::make_shared<InState>(); | 109 | 8 | context->set_function_state(scope, state); | 110 | 8 | DCHECK(context->get_num_args() >= 1); | 111 | 8 | if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_NULL) { | 112 | 0 | state->hybrid_set.reset(create_set(TYPE_BOOLEAN, 0, true)); | 113 | 8 | } else if (context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_CHAR || | 114 | 8 | context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_VARCHAR || | 115 | 8 | context->get_arg_type(0)->get_primitive_type() == PrimitiveType::TYPE_STRING) { | 116 | | // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly | 117 | 4 | state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context), true)); | 118 | 4 | } else { | 119 | 4 | state->hybrid_set.reset(create_set(context->get_arg_type(0)->get_primitive_type(), | 120 | 4 | get_size_with_out_null(context), true)); | 121 | 4 | } | 122 | | | 123 | 26 | for (int i = 1; i < context->get_num_args(); ++i) { | 124 | 18 | const auto& const_column_ptr = context->get_constant_col(i); | 125 | 18 | if (const_column_ptr != nullptr) { | 126 | 18 | auto const_data = const_column_ptr->column_ptr->get_data_at(0); | 127 | 18 | state->hybrid_set->insert((void*)const_data.data, const_data.size); | 128 | 18 | } else { | 129 | 0 | state->use_set = false; | 130 | 0 | state->hybrid_set.reset(); | 131 | 0 | break; | 132 | 0 | } | 133 | 18 | } | 134 | 8 | return Status::OK(); | 135 | 64 | } |
|
136 | | |
137 | | Status evaluate_inverted_index( |
138 | | const ColumnsWithTypeAndName& arguments, |
139 | | const std::vector<IndexFieldNameAndTypePair>& data_type_with_names, |
140 | | std::vector<segment_v2::IndexIterator*> iterators, uint32_t num_rows, |
141 | | const InvertedIndexAnalyzerCtx* analyzer_ctx, |
142 | 0 | segment_v2::InvertedIndexResultBitmap& bitmap_result) const override { |
143 | 0 | DCHECK(data_type_with_names.size() == 1); |
144 | 0 | DCHECK(iterators.size() == 1); |
145 | 0 | auto* iter = iterators[0]; |
146 | 0 | auto data_type_with_name = data_type_with_names[0]; |
147 | 0 | std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>(); |
148 | 0 | std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>(); |
149 | |
|
150 | 0 | if (iter == nullptr) { |
151 | 0 | return Status::OK(); |
152 | 0 | } |
153 | 0 | if (!segment_v2::IndexReaderHelper::has_string_or_bkd_index(iter)) { |
154 | | //NOT support in list when parser is FULLTEXT for expr inverted index evaluate. |
155 | 0 | return Status::OK(); |
156 | 0 | } |
157 | 0 | if (iter->has_null()) { |
158 | 0 | segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle; |
159 | 0 | RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); |
160 | 0 | null_bitmap = null_bitmap_cache_handle.get_bitmap(); |
161 | 0 | } |
162 | 0 | for (const auto& arg : arguments) { |
163 | 0 | Field param_value; |
164 | 0 | arg.column->get(0, param_value); |
165 | 0 | if (param_value.is_null()) { |
166 | | // predicate like column NOT IN (NULL, '') should not push down to index. |
167 | 0 | if (negative) { |
168 | 0 | return Status::OK(); |
169 | 0 | } |
170 | 0 | *roaring |= *null_bitmap; |
171 | 0 | continue; |
172 | 0 | } |
173 | 0 | InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; |
174 | 0 | segment_v2::InvertedIndexParam param; |
175 | 0 | param.column_name = data_type_with_name.first; |
176 | 0 | param.column_type = data_type_with_name.second; |
177 | 0 | param.query_value = param_value; |
178 | 0 | param.query_type = query_type; |
179 | 0 | param.num_rows = num_rows; |
180 | 0 | param.roaring = std::make_shared<roaring::Roaring>(); |
181 | 0 | param.analyzer_ctx = analyzer_ctx; |
182 | 0 | RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); |
183 | 0 | *roaring |= *param.roaring; |
184 | 0 | } |
185 | 0 | segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap); |
186 | 0 | bitmap_result = result; |
187 | 0 | bitmap_result.mask_out_null(); |
188 | 0 | if constexpr (negative) { |
189 | 0 | roaring::Roaring full_result; |
190 | 0 | full_result.addRange(0, num_rows); |
191 | 0 | bitmap_result.op_not(&full_result); |
192 | 0 | } |
193 | 0 | return Status::OK(); |
194 | 0 | } Unexecuted instantiation: _ZNK5doris10FunctionInILb0EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE Unexecuted instantiation: _ZNK5doris10FunctionInILb1EE23evaluate_inverted_indexERKSt6vectorINS_21ColumnWithTypeAndNameESaIS3_EERKS2_ISt4pairINSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt10shared_ptrIKNS_9IDataTypeEEESaISJ_EES2_IPNS_10segment_v213IndexIteratorESaISQ_EEjPKNS_24InvertedIndexAnalyzerCtxERNSO_25InvertedIndexResultBitmapE |
195 | | |
196 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
197 | 15.8k | uint32_t result, size_t input_rows_count) const override { |
198 | 15.8k | auto* in_state = reinterpret_cast<InState*>( |
199 | 15.8k | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
200 | 15.8k | if (!in_state) { |
201 | 0 | return Status::RuntimeError("funciton context for function '{}' must have Set;", |
202 | 0 | get_name()); |
203 | 0 | } |
204 | 15.8k | auto res = ColumnUInt8::create(); |
205 | 15.8k | ColumnUInt8::Container& vec_res = res->get_data(); |
206 | 15.8k | vec_res.resize(input_rows_count); |
207 | | |
208 | 15.8k | ColumnUInt8::MutablePtr col_null_map_to; |
209 | 15.8k | col_null_map_to = ColumnUInt8::create(input_rows_count, false); |
210 | 15.8k | auto& vec_null_map_to = col_null_map_to->get_data(); |
211 | | |
212 | 15.8k | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); |
213 | 15.8k | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); |
214 | | |
215 | 15.8k | if (in_state->use_set) { |
216 | 15.8k | if (materialized_column->is_nullable()) { |
217 | 15.8k | const auto* null_col_ptr = |
218 | 15.8k | assert_cast<const ColumnNullable*>(materialized_column.get()); |
219 | 15.8k | const auto& null_map = null_col_ptr->get_null_map_column().get_data(); |
220 | 15.8k | const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get(); |
221 | | |
222 | 15.8k | if (nested_col_ptr->is_column_string()) { |
223 | 826 | const auto* column_string_ptr = |
224 | 826 | assert_cast<const ColumnString*>(nested_col_ptr); |
225 | 826 | search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, |
226 | 826 | column_string_ptr); |
227 | 15.0k | } else { |
228 | | //TODO: support other column type |
229 | 15.0k | search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, |
230 | 15.0k | nested_col_ptr); |
231 | 15.0k | } |
232 | | |
233 | 15.8k | if (!in_state->hybrid_set->contain_null()) { |
234 | 3.03M | for (size_t i = 0; i < input_rows_count; ++i) { |
235 | 3.02M | vec_null_map_to[i] = null_map[i]; |
236 | 3.02M | } |
237 | 15.5k | } else { |
238 | 21.1k | for (size_t i = 0; i < input_rows_count; ++i) { |
239 | 20.8k | vec_null_map_to[i] = null_map[i] || negative == vec_res[i]; |
240 | 20.8k | } |
241 | 304 | } |
242 | | |
243 | 15.8k | } else { // non-nullable |
244 | 4 | if (is_string_type(left_arg.type->get_primitive_type())) { |
245 | 4 | const auto* column_string_ptr = |
246 | 4 | assert_cast<const ColumnString*>(materialized_column.get()); |
247 | 4 | search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr); |
248 | 4 | } else { |
249 | 0 | search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get()); |
250 | 0 | } |
251 | | |
252 | 4 | if (in_state->hybrid_set->contain_null()) { |
253 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
254 | 0 | vec_null_map_to[i] = negative == vec_res[i]; |
255 | 0 | } |
256 | 0 | } |
257 | 4 | } |
258 | 15.8k | } else { //!in_state->use_set |
259 | 0 | std::vector<ColumnPtr> set_columns; |
260 | 0 | for (int i = 1; i < arguments.size(); ++i) { |
261 | 0 | set_columns.emplace_back(block.get_by_position(arguments[i]).column); |
262 | 0 | } |
263 | 0 | if (col_const) { |
264 | 0 | impl_without_set<true>(context, set_columns, input_rows_count, vec_res, |
265 | 0 | vec_null_map_to, materialized_column); |
266 | 0 | } else { |
267 | 0 | impl_without_set<false>(context, set_columns, input_rows_count, vec_res, |
268 | 0 | vec_null_map_to, materialized_column); |
269 | 0 | } |
270 | 0 | } |
271 | | |
272 | 15.8k | if (block.get_by_position(result).type->is_nullable()) { |
273 | 15.8k | block.replace_by_position( |
274 | 15.8k | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); |
275 | 15.8k | } else { |
276 | 4 | block.replace_by_position(result, std::move(res)); |
277 | 4 | } |
278 | | |
279 | 15.8k | return Status::OK(); |
280 | 15.8k | } _ZNK5doris10FunctionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 197 | 15.8k | uint32_t result, size_t input_rows_count) const override { | 198 | 15.8k | auto* in_state = reinterpret_cast<InState*>( | 199 | 15.8k | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 200 | 15.8k | if (!in_state) { | 201 | 0 | return Status::RuntimeError("funciton context for function '{}' must have Set;", | 202 | 0 | get_name()); | 203 | 0 | } | 204 | 15.8k | auto res = ColumnUInt8::create(); | 205 | 15.8k | ColumnUInt8::Container& vec_res = res->get_data(); | 206 | 15.8k | vec_res.resize(input_rows_count); | 207 | | | 208 | 15.8k | ColumnUInt8::MutablePtr col_null_map_to; | 209 | 15.8k | col_null_map_to = ColumnUInt8::create(input_rows_count, false); | 210 | 15.8k | auto& vec_null_map_to = col_null_map_to->get_data(); | 211 | | | 212 | 15.8k | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); | 213 | 15.8k | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); | 214 | | | 215 | 15.8k | if (in_state->use_set) { | 216 | 15.8k | if (materialized_column->is_nullable()) { | 217 | 15.8k | const auto* null_col_ptr = | 218 | 15.8k | assert_cast<const ColumnNullable*>(materialized_column.get()); | 219 | 15.8k | const auto& null_map = null_col_ptr->get_null_map_column().get_data(); | 220 | 15.8k | const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get(); | 221 | | | 222 | 15.8k | if (nested_col_ptr->is_column_string()) { | 223 | 818 | const auto* column_string_ptr = | 224 | 818 | assert_cast<const ColumnString*>(nested_col_ptr); | 225 | 818 | search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, | 226 | 818 | column_string_ptr); | 227 | 15.0k | } else { | 228 | | //TODO: support other column type | 229 | 15.0k | search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, | 230 | 15.0k | nested_col_ptr); | 231 | 15.0k | } | 232 | | | 233 | 15.8k | if (!in_state->hybrid_set->contain_null()) { | 234 | 3.01M | for (size_t i = 0; i < input_rows_count; ++i) { | 235 | 3.00M | vec_null_map_to[i] = null_map[i]; | 236 | 3.00M | } | 237 | 15.5k | } else { | 238 | 21.1k | for (size_t i = 0; i < input_rows_count; ++i) { | 239 | 20.8k | vec_null_map_to[i] = null_map[i] || negative == vec_res[i]; | 240 | 20.8k | } | 241 | 304 | } | 242 | | | 243 | 15.8k | } else { // non-nullable | 244 | 4 | if (is_string_type(left_arg.type->get_primitive_type())) { | 245 | 4 | const auto* column_string_ptr = | 246 | 4 | assert_cast<const ColumnString*>(materialized_column.get()); | 247 | 4 | search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr); | 248 | 4 | } else { | 249 | 0 | search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get()); | 250 | 0 | } | 251 | | | 252 | 4 | if (in_state->hybrid_set->contain_null()) { | 253 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 254 | 0 | vec_null_map_to[i] = negative == vec_res[i]; | 255 | 0 | } | 256 | 0 | } | 257 | 4 | } | 258 | 15.8k | } else { //!in_state->use_set | 259 | 0 | std::vector<ColumnPtr> set_columns; | 260 | 0 | for (int i = 1; i < arguments.size(); ++i) { | 261 | 0 | set_columns.emplace_back(block.get_by_position(arguments[i]).column); | 262 | 0 | } | 263 | 0 | if (col_const) { | 264 | 0 | impl_without_set<true>(context, set_columns, input_rows_count, vec_res, | 265 | 0 | vec_null_map_to, materialized_column); | 266 | 0 | } else { | 267 | 0 | impl_without_set<false>(context, set_columns, input_rows_count, vec_res, | 268 | 0 | vec_null_map_to, materialized_column); | 269 | 0 | } | 270 | 0 | } | 271 | | | 272 | 15.8k | if (block.get_by_position(result).type->is_nullable()) { | 273 | 15.8k | block.replace_by_position( | 274 | 15.8k | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); | 275 | 15.8k | } else { | 276 | 4 | block.replace_by_position(result, std::move(res)); | 277 | 4 | } | 278 | | | 279 | 15.8k | return Status::OK(); | 280 | 15.8k | } |
_ZNK5doris10FunctionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 197 | 18 | uint32_t result, size_t input_rows_count) const override { | 198 | 18 | auto* in_state = reinterpret_cast<InState*>( | 199 | 18 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 200 | 18 | if (!in_state) { | 201 | 0 | return Status::RuntimeError("funciton context for function '{}' must have Set;", | 202 | 0 | get_name()); | 203 | 0 | } | 204 | 18 | auto res = ColumnUInt8::create(); | 205 | 18 | ColumnUInt8::Container& vec_res = res->get_data(); | 206 | 18 | vec_res.resize(input_rows_count); | 207 | | | 208 | 18 | ColumnUInt8::MutablePtr col_null_map_to; | 209 | 18 | col_null_map_to = ColumnUInt8::create(input_rows_count, false); | 210 | 18 | auto& vec_null_map_to = col_null_map_to->get_data(); | 211 | | | 212 | 18 | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); | 213 | 18 | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); | 214 | | | 215 | 18 | if (in_state->use_set) { | 216 | 18 | if (materialized_column->is_nullable()) { | 217 | 18 | const auto* null_col_ptr = | 218 | 18 | assert_cast<const ColumnNullable*>(materialized_column.get()); | 219 | 18 | const auto& null_map = null_col_ptr->get_null_map_column().get_data(); | 220 | 18 | const auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get(); | 221 | | | 222 | 18 | if (nested_col_ptr->is_column_string()) { | 223 | 8 | const auto* column_string_ptr = | 224 | 8 | assert_cast<const ColumnString*>(nested_col_ptr); | 225 | 8 | search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, | 226 | 8 | column_string_ptr); | 227 | 10 | } else { | 228 | | //TODO: support other column type | 229 | 10 | search_hash_set_check_null(in_state, input_rows_count, vec_res, null_map, | 230 | 10 | nested_col_ptr); | 231 | 10 | } | 232 | | | 233 | 18 | if (!in_state->hybrid_set->contain_null()) { | 234 | 16.9k | for (size_t i = 0; i < input_rows_count; ++i) { | 235 | 16.8k | vec_null_map_to[i] = null_map[i]; | 236 | 16.8k | } | 237 | 18 | } else { | 238 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 239 | 0 | vec_null_map_to[i] = null_map[i] || negative == vec_res[i]; | 240 | 0 | } | 241 | 0 | } | 242 | | | 243 | 18 | } else { // non-nullable | 244 | 0 | if (is_string_type(left_arg.type->get_primitive_type())) { | 245 | 0 | const auto* column_string_ptr = | 246 | 0 | assert_cast<const ColumnString*>(materialized_column.get()); | 247 | 0 | search_hash_set(in_state, input_rows_count, vec_res, column_string_ptr); | 248 | 0 | } else { | 249 | 0 | search_hash_set(in_state, input_rows_count, vec_res, materialized_column.get()); | 250 | 0 | } | 251 | |
| 252 | 0 | if (in_state->hybrid_set->contain_null()) { | 253 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { | 254 | 0 | vec_null_map_to[i] = negative == vec_res[i]; | 255 | 0 | } | 256 | 0 | } | 257 | 0 | } | 258 | 18 | } else { //!in_state->use_set | 259 | 0 | std::vector<ColumnPtr> set_columns; | 260 | 0 | for (int i = 1; i < arguments.size(); ++i) { | 261 | 0 | set_columns.emplace_back(block.get_by_position(arguments[i]).column); | 262 | 0 | } | 263 | 0 | if (col_const) { | 264 | 0 | impl_without_set<true>(context, set_columns, input_rows_count, vec_res, | 265 | 0 | vec_null_map_to, materialized_column); | 266 | 0 | } else { | 267 | 0 | impl_without_set<false>(context, set_columns, input_rows_count, vec_res, | 268 | 0 | vec_null_map_to, materialized_column); | 269 | 0 | } | 270 | 0 | } | 271 | | | 272 | 18 | if (block.get_by_position(result).type->is_nullable()) { | 273 | 18 | block.replace_by_position( | 274 | 18 | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); | 275 | 18 | } else { | 276 | 0 | block.replace_by_position(result, std::move(res)); | 277 | 0 | } | 278 | | | 279 | 18 | return Status::OK(); | 280 | 18 | } |
|
281 | | |
282 | | private: |
283 | | template <typename T> |
284 | | static void search_hash_set_check_null(InState* in_state, size_t input_rows_count, |
285 | | ColumnUInt8::Container& vec_res, |
286 | 15.8k | const ColumnUInt8::Container& null_map, T* col_ptr) { |
287 | 15.8k | if constexpr (!negative) { |
288 | 15.8k | in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map, |
289 | 15.8k | vec_res); |
290 | 15.8k | } else { |
291 | 18 | in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map, |
292 | 18 | vec_res); |
293 | 18 | } |
294 | 15.8k | } _ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_ Line | Count | Source | 286 | 818 | const ColumnUInt8::Container& null_map, T* col_ptr) { | 287 | 818 | if constexpr (!negative) { | 288 | 818 | in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map, | 289 | 818 | vec_res); | 290 | | } else { | 291 | | in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map, | 292 | | vec_res); | 293 | | } | 294 | 818 | } |
_ZN5doris10FunctionInILb0EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_ Line | Count | Source | 286 | 15.0k | const ColumnUInt8::Container& null_map, T* col_ptr) { | 287 | 15.0k | if constexpr (!negative) { | 288 | 15.0k | in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map, | 289 | 15.0k | vec_res); | 290 | | } else { | 291 | | in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map, | 292 | | vec_res); | 293 | | } | 294 | 15.0k | } |
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSC_PT_ Line | Count | Source | 286 | 8 | const ColumnUInt8::Container& null_map, T* col_ptr) { | 287 | | if constexpr (!negative) { | 288 | | in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map, | 289 | | vec_res); | 290 | 8 | } else { | 291 | 8 | in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map, | 292 | 8 | vec_res); | 293 | 8 | } | 294 | 8 | } |
_ZN5doris10FunctionInILb1EE26search_hash_set_check_nullIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKSB_PT_ Line | Count | Source | 286 | 10 | const ColumnUInt8::Container& null_map, T* col_ptr) { | 287 | | if constexpr (!negative) { | 288 | | in_state->hybrid_set->find_batch_nullable(*col_ptr, input_rows_count, null_map, | 289 | | vec_res); | 290 | 10 | } else { | 291 | 10 | in_state->hybrid_set->find_batch_nullable_negative(*col_ptr, input_rows_count, null_map, | 292 | 10 | vec_res); | 293 | 10 | } | 294 | 10 | } |
|
295 | | |
296 | | template <typename T> |
297 | | static void search_hash_set(InState* in_state, size_t input_rows_count, |
298 | 4 | ColumnUInt8::Container& vec_res, T* col_ptr) { |
299 | 4 | if constexpr (!negative) { |
300 | 4 | in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res); |
301 | 4 | } else { |
302 | 0 | in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res); |
303 | 0 | } |
304 | 4 | } _ZN5doris10FunctionInILb0EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_ Line | Count | Source | 298 | 4 | ColumnUInt8::Container& vec_res, T* col_ptr) { | 299 | 4 | if constexpr (!negative) { | 300 | 4 | in_state->hybrid_set->find_batch(*col_ptr, input_rows_count, vec_res); | 301 | | } else { | 302 | | in_state->hybrid_set->find_batch_negative(*col_ptr, input_rows_count, vec_res); | 303 | | } | 304 | 4 | } |
Unexecuted instantiation: _ZN5doris10FunctionInILb0EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_ Unexecuted instantiation: _ZN5doris10FunctionInILb1EE15search_hash_setIKNS_9ColumnStrIjEEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_ Unexecuted instantiation: _ZN5doris10FunctionInILb1EE15search_hash_setIKNS_7IColumnEEEvPNS_7InStateEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEPT_ |
305 | | |
306 | | template <bool Const> |
307 | | static void impl_without_set(FunctionContext* context, |
308 | | const std::vector<ColumnPtr>& set_columns, size_t input_rows_count, |
309 | | ColumnUInt8::Container& vec_res, |
310 | | ColumnUInt8::Container& vec_null_map_to, |
311 | 0 | const ColumnPtr& materialized_column) { |
312 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
313 | 0 | const auto& ref_data = materialized_column->get_data_at(index_check_const(i, Const)); |
314 | 0 | if (ref_data.data == nullptr) { |
315 | 0 | vec_null_map_to[i] = true; |
316 | 0 | continue; |
317 | 0 | } |
318 | | |
319 | 0 | std::vector<StringRef> set_datas; |
320 | | // To comply with the SQL standard, IN() returns NULL not only if the expression on the left hand side is NULL, |
321 | | // but also if no match is found in the list and one of the expressions in the list is NULL. |
322 | 0 | bool null_in_set = false; |
323 | |
|
324 | 0 | for (const auto& set_column : set_columns) { |
325 | 0 | auto set_data = set_column->get_data_at(i); |
326 | 0 | if (set_data.data == nullptr) { |
327 | 0 | null_in_set = true; |
328 | 0 | } else { |
329 | 0 | set_datas.push_back(set_data); |
330 | 0 | } |
331 | 0 | } |
332 | 0 | std::unique_ptr<HybridSetBase> hybrid_set(create_set( |
333 | 0 | context->get_arg_type(0)->get_primitive_type(), set_datas.size(), true)); |
334 | 0 | for (auto& set_data : set_datas) { |
335 | 0 | hybrid_set->insert((void*)(set_data.data), set_data.size); |
336 | 0 | } |
337 | |
|
338 | 0 | vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size); |
339 | 0 | if (null_in_set) { |
340 | 0 | vec_null_map_to[i] = negative == vec_res[i]; |
341 | 0 | } else { |
342 | 0 | vec_null_map_to[i] = false; |
343 | 0 | } |
344 | 0 | } |
345 | 0 | } Unexecuted instantiation: _ZN5doris10FunctionInILb0EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_ Unexecuted instantiation: _ZN5doris10FunctionInILb0EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_ Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb1EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_ Unexecuted instantiation: _ZN5doris10FunctionInILb1EE16impl_without_setILb0EEEvPNS_15FunctionContextERKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS7_EESaISA_EEmRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESK_RKSA_ |
346 | | }; |
347 | | |
348 | | } // namespace doris |