be/src/exprs/function/function_collection_in.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | |
19 | | #pragma once |
20 | | |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <memory> |
26 | | #include <unordered_set> |
27 | | #include <utility> |
28 | | #include <vector> |
29 | | |
30 | | #include "common/status.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/column/column.h" |
33 | | #include "core/column/column_const.h" |
34 | | #include "core/column/column_nullable.h" |
35 | | #include "core/column/column_struct.h" |
36 | | #include "core/column/column_vector.h" |
37 | | #include "core/data_type/data_type_factory.hpp" |
38 | | #include "core/data_type/data_type_nullable.h" |
39 | | #include "core/data_type/data_type_number.h" |
40 | | #include "exprs/function/function.h" |
41 | | |
42 | | namespace doris { |
43 | | #include "common/compile_check_begin.h" |
44 | | struct ColumnRowRef { |
45 | | ENABLE_FACTORY_CREATOR(ColumnRowRef); |
46 | | ColumnPtr column; |
47 | | size_t row_idx; |
48 | | |
49 | | // equals when call set insert, this operator will be used |
50 | 6 | bool operator==(const ColumnRowRef& other) const { |
51 | 6 | return column->compare_at(row_idx, other.row_idx, *other.column, 0) == 0; |
52 | 6 | } |
53 | | // compare |
54 | 0 | bool operator<(const ColumnRowRef& other) const { |
55 | 0 | return column->compare_at(row_idx, other.row_idx, *other.column, 0) < 0; |
56 | 0 | } |
57 | | |
58 | | // when call set find, will use hash to find |
59 | 43 | size_t operator()(const ColumnRowRef& a) const { |
60 | 43 | uint32_t hash_val = 0; |
61 | 43 | a.column->update_crc_with_value(a.row_idx, a.row_idx + 1, hash_val, nullptr); |
62 | 43 | return hash_val; |
63 | 43 | } |
64 | | }; |
65 | | |
66 | | struct CollectionInState { |
67 | | ENABLE_FACTORY_CREATOR(CollectionInState) |
68 | | std::unordered_set<ColumnRowRef, ColumnRowRef> args_set; |
69 | | bool null_in_set = false; |
70 | | }; |
71 | | |
72 | | template <bool negative> |
73 | | class FunctionCollectionIn : public IFunction { |
74 | | public: |
75 | | static constexpr auto name = negative ? "collection_not_in" : "collection_in"; |
76 | | |
77 | 30 | static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }_ZN5doris20FunctionCollectionInILb0EE6createEv Line | Count | Source | 77 | 18 | static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); } |
_ZN5doris20FunctionCollectionInILb1EE6createEv Line | Count | Source | 77 | 12 | static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); } |
|
78 | | |
79 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE8get_nameB5cxx11Ev |
80 | | |
81 | 14 | bool is_variadic() const override { return true; }_ZNK5doris20FunctionCollectionInILb0EE11is_variadicEv Line | Count | Source | 81 | 10 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionCollectionInILb1EE11is_variadicEv Line | Count | Source | 81 | 4 | bool is_variadic() const override { return true; } |
|
82 | | |
83 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE23get_number_of_argumentsEv |
84 | | |
85 | 12 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { |
86 | 26 | for (const auto& arg : args) { |
87 | 26 | if (arg->is_nullable()) { |
88 | 8 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
89 | 8 | } |
90 | 26 | } |
91 | 4 | return std::make_shared<DataTypeUInt8>(); |
92 | 12 | } _ZNK5doris20FunctionCollectionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 85 | 9 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { | 86 | 16 | for (const auto& arg : args) { | 87 | 16 | if (arg->is_nullable()) { | 88 | 7 | return make_nullable(std::make_shared<DataTypeUInt8>()); | 89 | 7 | } | 90 | 16 | } | 91 | 2 | return std::make_shared<DataTypeUInt8>(); | 92 | 9 | } |
_ZNK5doris20FunctionCollectionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Line | Count | Source | 85 | 3 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { | 86 | 10 | for (const auto& arg : args) { | 87 | 10 | if (arg->is_nullable()) { | 88 | 1 | return make_nullable(std::make_shared<DataTypeUInt8>()); | 89 | 1 | } | 90 | 10 | } | 91 | 2 | return std::make_shared<DataTypeUInt8>(); | 92 | 3 | } |
|
93 | | |
94 | 30 | bool use_default_implementation_for_nulls() const override { return false; }_ZNK5doris20FunctionCollectionInILb0EE36use_default_implementation_for_nullsEv Line | Count | Source | 94 | 24 | bool use_default_implementation_for_nulls() const override { return false; } |
_ZNK5doris20FunctionCollectionInILb1EE36use_default_implementation_for_nullsEv Line | Count | Source | 94 | 6 | bool use_default_implementation_for_nulls() const override { return false; } |
|
95 | | |
96 | | // make data in context into a set |
97 | 96 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
98 | 96 | if (scope == FunctionContext::THREAD_LOCAL) { |
99 | 84 | return Status::OK(); |
100 | 84 | } |
101 | 12 | int num_args = context->get_num_args(); |
102 | 12 | DCHECK(num_args >= 1); |
103 | | |
104 | 12 | std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>(); |
105 | 12 | context->set_function_state(scope, state); |
106 | | |
107 | 12 | DataTypePtr args_type = remove_nullable(context->get_arg_type(0)); |
108 | 12 | MutableColumnPtr args_column_ptr = args_type->create_column(); |
109 | | |
110 | 42 | for (int i = 1; i < num_args; i++) { |
111 | | // FE should make element type consistent and |
112 | | // equalize the length of the elements in struct |
113 | 30 | const auto& const_column_ptr = context->get_constant_col(i); |
114 | | // Types like struct, array, and map only support constant expressions. |
115 | 30 | DCHECK(const_column_ptr != nullptr); |
116 | 30 | const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr); |
117 | 30 | if (col->is_nullable()) { |
118 | 5 | const auto* null_col = check_and_get_column<ColumnNullable>(col.get()); |
119 | 5 | if (null_col->has_null()) { |
120 | 5 | state->null_in_set = true; |
121 | 5 | } else { |
122 | 0 | args_column_ptr->insert_from(null_col->get_nested_column(), 0); |
123 | 0 | } |
124 | 25 | } else { |
125 | 25 | args_column_ptr->insert_from(*col, 0); |
126 | 25 | } |
127 | 30 | } |
128 | 12 | ColumnPtr column_ptr = std::move(args_column_ptr); |
129 | | // make collection ref into set |
130 | 12 | auto col_size = column_ptr->size(); |
131 | 37 | for (size_t i = 0; i < col_size; i++) { |
132 | 25 | state->args_set.insert({column_ptr, i}); |
133 | 25 | } |
134 | | |
135 | 12 | return Status::OK(); |
136 | 96 | } _ZN5doris20FunctionCollectionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE Line | Count | Source | 97 | 87 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 98 | 87 | if (scope == FunctionContext::THREAD_LOCAL) { | 99 | 78 | return Status::OK(); | 100 | 78 | } | 101 | 9 | int num_args = context->get_num_args(); | 102 | 9 | DCHECK(num_args >= 1); | 103 | | | 104 | 9 | std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>(); | 105 | 9 | context->set_function_state(scope, state); | 106 | | | 107 | 9 | DataTypePtr args_type = remove_nullable(context->get_arg_type(0)); | 108 | 9 | MutableColumnPtr args_column_ptr = args_type->create_column(); | 109 | | | 110 | 32 | for (int i = 1; i < num_args; i++) { | 111 | | // FE should make element type consistent and | 112 | | // equalize the length of the elements in struct | 113 | 23 | const auto& const_column_ptr = context->get_constant_col(i); | 114 | | // Types like struct, array, and map only support constant expressions. | 115 | 23 | DCHECK(const_column_ptr != nullptr); | 116 | 23 | const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr); | 117 | 23 | if (col->is_nullable()) { | 118 | 4 | const auto* null_col = check_and_get_column<ColumnNullable>(col.get()); | 119 | 4 | if (null_col->has_null()) { | 120 | 4 | state->null_in_set = true; | 121 | 4 | } else { | 122 | 0 | args_column_ptr->insert_from(null_col->get_nested_column(), 0); | 123 | 0 | } | 124 | 19 | } else { | 125 | 19 | args_column_ptr->insert_from(*col, 0); | 126 | 19 | } | 127 | 23 | } | 128 | 9 | ColumnPtr column_ptr = std::move(args_column_ptr); | 129 | | // make collection ref into set | 130 | 9 | auto col_size = column_ptr->size(); | 131 | 28 | for (size_t i = 0; i < col_size; i++) { | 132 | 19 | state->args_set.insert({column_ptr, i}); | 133 | 19 | } | 134 | | | 135 | 9 | return Status::OK(); | 136 | 87 | } |
_ZN5doris20FunctionCollectionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE Line | Count | Source | 97 | 9 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { | 98 | 9 | if (scope == FunctionContext::THREAD_LOCAL) { | 99 | 6 | return Status::OK(); | 100 | 6 | } | 101 | 3 | int num_args = context->get_num_args(); | 102 | 3 | DCHECK(num_args >= 1); | 103 | | | 104 | 3 | std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>(); | 105 | 3 | context->set_function_state(scope, state); | 106 | | | 107 | 3 | DataTypePtr args_type = remove_nullable(context->get_arg_type(0)); | 108 | 3 | MutableColumnPtr args_column_ptr = args_type->create_column(); | 109 | | | 110 | 10 | for (int i = 1; i < num_args; i++) { | 111 | | // FE should make element type consistent and | 112 | | // equalize the length of the elements in struct | 113 | 7 | const auto& const_column_ptr = context->get_constant_col(i); | 114 | | // Types like struct, array, and map only support constant expressions. | 115 | 7 | DCHECK(const_column_ptr != nullptr); | 116 | 7 | const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr); | 117 | 7 | if (col->is_nullable()) { | 118 | 1 | const auto* null_col = check_and_get_column<ColumnNullable>(col.get()); | 119 | 1 | if (null_col->has_null()) { | 120 | 1 | state->null_in_set = true; | 121 | 1 | } else { | 122 | 0 | args_column_ptr->insert_from(null_col->get_nested_column(), 0); | 123 | 0 | } | 124 | 6 | } else { | 125 | 6 | args_column_ptr->insert_from(*col, 0); | 126 | 6 | } | 127 | 7 | } | 128 | 3 | ColumnPtr column_ptr = std::move(args_column_ptr); | 129 | | // make collection ref into set | 130 | 3 | auto col_size = column_ptr->size(); | 131 | 9 | for (size_t i = 0; i < col_size; i++) { | 132 | 6 | state->args_set.insert({column_ptr, i}); | 133 | 6 | } | 134 | | | 135 | 3 | return Status::OK(); | 136 | 9 | } |
|
137 | | |
138 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
139 | 18 | uint32_t result, size_t input_rows_count) const override { |
140 | 18 | auto in_state = reinterpret_cast<CollectionInState*>( |
141 | 18 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
142 | 18 | if (!in_state) { |
143 | 0 | return Status::RuntimeError("function context for function '{}' must have Set;", |
144 | 0 | get_name()); |
145 | 0 | } |
146 | 18 | const auto& args_set = in_state->args_set; |
147 | 18 | const bool null_in_set = in_state->null_in_set; |
148 | 18 | auto res = ColumnUInt8::create(); |
149 | 18 | ColumnUInt8::Container& vec_res = res->get_data(); |
150 | 18 | vec_res.resize(input_rows_count); |
151 | | |
152 | 18 | ColumnUInt8::MutablePtr col_null_map_to; |
153 | 18 | col_null_map_to = ColumnUInt8::create(input_rows_count, false); |
154 | 18 | auto& vec_null_map_to = col_null_map_to->get_data(); |
155 | | |
156 | 18 | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); |
157 | 18 | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); |
158 | 18 | auto materialized_column_not_null = materialized_column; |
159 | 18 | if (materialized_column_not_null->is_nullable()) { |
160 | 12 | materialized_column_not_null = assert_cast<ColumnPtr>( |
161 | 12 | check_and_get_column<ColumnNullable>(materialized_column_not_null.get()) |
162 | 12 | ->get_nested_column_ptr()); |
163 | 12 | } |
164 | | |
165 | 36 | for (size_t i = 0; i < input_rows_count; ++i) { |
166 | 18 | bool find = args_set.find({materialized_column_not_null, i}) != args_set.end(); |
167 | | |
168 | 18 | if constexpr (negative) { |
169 | 3 | vec_res[i] = !find; |
170 | 15 | } else { |
171 | 15 | vec_res[i] = find; |
172 | 15 | } |
173 | | |
174 | 18 | if (null_in_set) { |
175 | 8 | vec_null_map_to[i] = negative == vec_res[i]; |
176 | 10 | } else { |
177 | 10 | vec_null_map_to[i] = false; |
178 | 10 | } |
179 | 18 | } |
180 | | |
181 | 18 | if (block.get_by_position(result).type->is_nullable()) { |
182 | 14 | block.replace_by_position( |
183 | 14 | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); |
184 | 14 | } else { |
185 | 4 | block.replace_by_position(result, std::move(res)); |
186 | 4 | } |
187 | 18 | return Status::OK(); |
188 | 18 | } _ZNK5doris20FunctionCollectionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 139 | 15 | uint32_t result, size_t input_rows_count) const override { | 140 | 15 | auto in_state = reinterpret_cast<CollectionInState*>( | 141 | 15 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 142 | 15 | if (!in_state) { | 143 | 0 | return Status::RuntimeError("function context for function '{}' must have Set;", | 144 | 0 | get_name()); | 145 | 0 | } | 146 | 15 | const auto& args_set = in_state->args_set; | 147 | 15 | const bool null_in_set = in_state->null_in_set; | 148 | 15 | auto res = ColumnUInt8::create(); | 149 | 15 | ColumnUInt8::Container& vec_res = res->get_data(); | 150 | 15 | vec_res.resize(input_rows_count); | 151 | | | 152 | 15 | ColumnUInt8::MutablePtr col_null_map_to; | 153 | 15 | col_null_map_to = ColumnUInt8::create(input_rows_count, false); | 154 | 15 | auto& vec_null_map_to = col_null_map_to->get_data(); | 155 | | | 156 | 15 | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); | 157 | 15 | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); | 158 | 15 | auto materialized_column_not_null = materialized_column; | 159 | 15 | if (materialized_column_not_null->is_nullable()) { | 160 | 12 | materialized_column_not_null = assert_cast<ColumnPtr>( | 161 | 12 | check_and_get_column<ColumnNullable>(materialized_column_not_null.get()) | 162 | 12 | ->get_nested_column_ptr()); | 163 | 12 | } | 164 | | | 165 | 30 | for (size_t i = 0; i < input_rows_count; ++i) { | 166 | 15 | bool find = args_set.find({materialized_column_not_null, i}) != args_set.end(); | 167 | | | 168 | | if constexpr (negative) { | 169 | | vec_res[i] = !find; | 170 | 15 | } else { | 171 | 15 | vec_res[i] = find; | 172 | 15 | } | 173 | | | 174 | 15 | if (null_in_set) { | 175 | 7 | vec_null_map_to[i] = negative == vec_res[i]; | 176 | 8 | } else { | 177 | 8 | vec_null_map_to[i] = false; | 178 | 8 | } | 179 | 15 | } | 180 | | | 181 | 15 | if (block.get_by_position(result).type->is_nullable()) { | 182 | 13 | block.replace_by_position( | 183 | 13 | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); | 184 | 13 | } else { | 185 | 2 | block.replace_by_position(result, std::move(res)); | 186 | 2 | } | 187 | 15 | return Status::OK(); | 188 | 15 | } |
_ZNK5doris20FunctionCollectionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 139 | 3 | uint32_t result, size_t input_rows_count) const override { | 140 | 3 | auto in_state = reinterpret_cast<CollectionInState*>( | 141 | 3 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); | 142 | 3 | if (!in_state) { | 143 | 0 | return Status::RuntimeError("function context for function '{}' must have Set;", | 144 | 0 | get_name()); | 145 | 0 | } | 146 | 3 | const auto& args_set = in_state->args_set; | 147 | 3 | const bool null_in_set = in_state->null_in_set; | 148 | 3 | auto res = ColumnUInt8::create(); | 149 | 3 | ColumnUInt8::Container& vec_res = res->get_data(); | 150 | 3 | vec_res.resize(input_rows_count); | 151 | | | 152 | 3 | ColumnUInt8::MutablePtr col_null_map_to; | 153 | 3 | col_null_map_to = ColumnUInt8::create(input_rows_count, false); | 154 | 3 | auto& vec_null_map_to = col_null_map_to->get_data(); | 155 | | | 156 | 3 | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); | 157 | 3 | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); | 158 | 3 | auto materialized_column_not_null = materialized_column; | 159 | 3 | if (materialized_column_not_null->is_nullable()) { | 160 | 0 | materialized_column_not_null = assert_cast<ColumnPtr>( | 161 | 0 | check_and_get_column<ColumnNullable>(materialized_column_not_null.get()) | 162 | 0 | ->get_nested_column_ptr()); | 163 | 0 | } | 164 | | | 165 | 6 | for (size_t i = 0; i < input_rows_count; ++i) { | 166 | 3 | bool find = args_set.find({materialized_column_not_null, i}) != args_set.end(); | 167 | | | 168 | 3 | if constexpr (negative) { | 169 | 3 | vec_res[i] = !find; | 170 | | } else { | 171 | | vec_res[i] = find; | 172 | | } | 173 | | | 174 | 3 | if (null_in_set) { | 175 | 1 | vec_null_map_to[i] = negative == vec_res[i]; | 176 | 2 | } else { | 177 | 2 | vec_null_map_to[i] = false; | 178 | 2 | } | 179 | 3 | } | 180 | | | 181 | 3 | if (block.get_by_position(result).type->is_nullable()) { | 182 | 1 | block.replace_by_position( | 183 | 1 | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); | 184 | 2 | } else { | 185 | 2 | block.replace_by_position(result, std::move(res)); | 186 | 2 | } | 187 | 3 | return Status::OK(); | 188 | 3 | } |
|
189 | | }; |
190 | | |
191 | | } // namespace doris |
192 | | |
193 | | #include "common/compile_check_end.h" |