be/src/exprs/function/function_collection_in.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | // This file is copied from |
18 | | |
19 | | #pragma once |
20 | | |
21 | | #include <glog/logging.h> |
22 | | #include <stddef.h> |
23 | | |
24 | | #include <algorithm> |
25 | | #include <memory> |
26 | | #include <unordered_set> |
27 | | #include <utility> |
28 | | #include <vector> |
29 | | |
30 | | #include "common/status.h" |
31 | | #include "core/block/block.h" |
32 | | #include "core/column/column.h" |
33 | | #include "core/column/column_const.h" |
34 | | #include "core/column/column_nullable.h" |
35 | | #include "core/column/column_struct.h" |
36 | | #include "core/column/column_vector.h" |
37 | | #include "core/data_type/data_type_factory.hpp" |
38 | | #include "core/data_type/data_type_nullable.h" |
39 | | #include "core/data_type/data_type_number.h" |
40 | | #include "exprs/function/function.h" |
41 | | |
42 | | namespace doris { |
43 | | struct ColumnRowRef { |
44 | | ENABLE_FACTORY_CREATOR(ColumnRowRef); |
45 | | ColumnPtr column; |
46 | | size_t row_idx; |
47 | | |
48 | | // equals when call set insert, this operator will be used |
49 | 0 | bool operator==(const ColumnRowRef& other) const { |
50 | 0 | return column->compare_at(row_idx, other.row_idx, *other.column, 0) == 0; |
51 | 0 | } |
52 | | // compare |
53 | 0 | bool operator<(const ColumnRowRef& other) const { |
54 | 0 | return column->compare_at(row_idx, other.row_idx, *other.column, 0) < 0; |
55 | 0 | } |
56 | | |
57 | | // when call set find, will use hash to find |
58 | 0 | size_t operator()(const ColumnRowRef& a) const { |
59 | 0 | uint32_t hash_val = 0; |
60 | 0 | a.column->update_crc_with_value(a.row_idx, a.row_idx + 1, hash_val, nullptr); |
61 | 0 | return hash_val; |
62 | 0 | } |
63 | | }; |
64 | | |
65 | | struct CollectionInState { |
66 | | ENABLE_FACTORY_CREATOR(CollectionInState) |
67 | | std::unordered_set<ColumnRowRef, ColumnRowRef> args_set; |
68 | | bool null_in_set = false; |
69 | | }; |
70 | | |
71 | | template <bool negative> |
72 | | class FunctionCollectionIn : public IFunction { |
73 | | public: |
74 | | static constexpr auto name = negative ? "collection_not_in" : "collection_in"; |
75 | | |
76 | 4 | static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); }_ZN5doris20FunctionCollectionInILb0EE6createEv Line | Count | Source | 76 | 2 | static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); } |
_ZN5doris20FunctionCollectionInILb1EE6createEv Line | Count | Source | 76 | 2 | static FunctionPtr create() { return std::make_shared<FunctionCollectionIn>(); } |
|
77 | | |
78 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE8get_nameB5cxx11Ev |
79 | | |
80 | 2 | bool is_variadic() const override { return true; }_ZNK5doris20FunctionCollectionInILb0EE11is_variadicEv Line | Count | Source | 80 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris20FunctionCollectionInILb1EE11is_variadicEv Line | Count | Source | 80 | 1 | bool is_variadic() const override { return true; } |
|
81 | | |
82 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE23get_number_of_argumentsEv |
83 | | |
84 | 0 | DataTypePtr get_return_type_impl(const DataTypes& args) const override { |
85 | 0 | for (const auto& arg : args) { |
86 | 0 | if (arg->is_nullable()) { |
87 | 0 | return make_nullable(std::make_shared<DataTypeUInt8>()); |
88 | 0 | } |
89 | 0 | } |
90 | 0 | return std::make_shared<DataTypeUInt8>(); |
91 | 0 | } Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE |
92 | | |
93 | 0 | bool use_default_implementation_for_nulls() const override { return false; }Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE36use_default_implementation_for_nullsEv Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE36use_default_implementation_for_nullsEv |
94 | | |
95 | | // make data in context into a set |
96 | 0 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
97 | 0 | if (scope == FunctionContext::THREAD_LOCAL) { |
98 | 0 | return Status::OK(); |
99 | 0 | } |
100 | 0 | int num_args = context->get_num_args(); |
101 | 0 | DCHECK(num_args >= 1); |
102 | |
|
103 | 0 | std::shared_ptr<CollectionInState> state = std::make_shared<CollectionInState>(); |
104 | 0 | context->set_function_state(scope, state); |
105 | |
|
106 | 0 | DataTypePtr args_type = remove_nullable(context->get_arg_type(0)); |
107 | 0 | MutableColumnPtr args_column_ptr = args_type->create_column(); |
108 | |
|
109 | 0 | for (int i = 1; i < num_args; i++) { |
110 | | // FE should make element type consistent and |
111 | | // equalize the length of the elements in struct |
112 | 0 | const auto& const_column_ptr = context->get_constant_col(i); |
113 | | // Types like struct, array, and map only support constant expressions. |
114 | 0 | DCHECK(const_column_ptr != nullptr); |
115 | 0 | const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr); |
116 | 0 | if (col->is_nullable()) { |
117 | 0 | const auto* null_col = check_and_get_column<ColumnNullable>(col.get()); |
118 | 0 | if (null_col->has_null()) { |
119 | 0 | state->null_in_set = true; |
120 | 0 | } else { |
121 | 0 | args_column_ptr->insert_from(null_col->get_nested_column(), 0); |
122 | 0 | } |
123 | 0 | } else { |
124 | 0 | args_column_ptr->insert_from(*col, 0); |
125 | 0 | } |
126 | 0 | } |
127 | 0 | ColumnPtr column_ptr = std::move(args_column_ptr); |
128 | | // make collection ref into set |
129 | 0 | auto col_size = column_ptr->size(); |
130 | 0 | for (size_t i = 0; i < col_size; i++) { |
131 | 0 | state->args_set.insert({column_ptr, i}); |
132 | 0 | } |
133 | |
|
134 | 0 | return Status::OK(); |
135 | 0 | } Unexecuted instantiation: _ZN5doris20FunctionCollectionInILb0EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE Unexecuted instantiation: _ZN5doris20FunctionCollectionInILb1EE4openEPNS_15FunctionContextENS2_18FunctionStateScopeE |
136 | | |
137 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
138 | 0 | uint32_t result, size_t input_rows_count) const override { |
139 | 0 | auto in_state = reinterpret_cast<CollectionInState*>( |
140 | 0 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
141 | 0 | if (!in_state) { |
142 | 0 | return Status::RuntimeError("function context for function '{}' must have Set;", |
143 | 0 | get_name()); |
144 | 0 | } |
145 | 0 | const auto& args_set = in_state->args_set; |
146 | 0 | const bool null_in_set = in_state->null_in_set; |
147 | 0 | auto res = ColumnUInt8::create(); |
148 | 0 | ColumnUInt8::Container& vec_res = res->get_data(); |
149 | 0 | vec_res.resize(input_rows_count); |
150 | |
|
151 | 0 | ColumnUInt8::MutablePtr col_null_map_to; |
152 | 0 | col_null_map_to = ColumnUInt8::create(input_rows_count, false); |
153 | 0 | auto& vec_null_map_to = col_null_map_to->get_data(); |
154 | |
|
155 | 0 | const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]); |
156 | 0 | const auto& [materialized_column, col_const] = unpack_if_const(left_arg.column); |
157 | 0 | auto materialized_column_not_null = materialized_column; |
158 | 0 | if (materialized_column_not_null->is_nullable()) { |
159 | 0 | materialized_column_not_null = assert_cast<ColumnPtr>( |
160 | 0 | check_and_get_column<ColumnNullable>(materialized_column_not_null.get()) |
161 | 0 | ->get_nested_column_ptr()); |
162 | 0 | } |
163 | |
|
164 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
165 | 0 | bool find = args_set.find({materialized_column_not_null, i}) != args_set.end(); |
166 | |
|
167 | 0 | if constexpr (negative) { |
168 | 0 | vec_res[i] = !find; |
169 | 0 | } else { |
170 | 0 | vec_res[i] = find; |
171 | 0 | } |
172 | |
|
173 | 0 | if (null_in_set) { |
174 | 0 | vec_null_map_to[i] = negative == vec_res[i]; |
175 | 0 | } else { |
176 | 0 | vec_null_map_to[i] = false; |
177 | 0 | } |
178 | 0 | } |
179 | |
|
180 | 0 | if (block.get_by_position(result).type->is_nullable()) { |
181 | 0 | block.replace_by_position( |
182 | 0 | result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); |
183 | 0 | } else { |
184 | 0 | block.replace_by_position(result, std::move(res)); |
185 | 0 | } |
186 | 0 | return Status::OK(); |
187 | 0 | } Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris20FunctionCollectionInILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
188 | | }; |
189 | | |
190 | | } // namespace doris |