be/src/exprs/function/function_string_basic.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <cstddef> |
19 | | #include <cstring> |
20 | | #include <string> |
21 | | |
22 | | #include "common/status.h" |
23 | | #include "core/assert_cast.h" |
24 | | #include "core/block/block.h" |
25 | | #include "core/block/column_numbers.h" |
26 | | #include "core/column/column_const.h" |
27 | | #include "core/column/column_nullable.h" |
28 | | #include "core/column/column_string.h" |
29 | | #include "core/column/column_vector.h" |
30 | | #include "core/data_type/data_type_nullable.h" |
31 | | #include "core/data_type/data_type_number.h" |
32 | | #include "core/data_type/data_type_string.h" |
33 | | #include "core/string_ref.h" |
34 | | #include "exec/common/stringop_substring.h" |
35 | | #include "exec/common/template_helpers.hpp" |
36 | | #include "exec/common/util.hpp" |
37 | | #include "exprs/function/function.h" |
38 | | #include "exprs/function/function_helpers.h" |
39 | | #include "exprs/function/simple_function_factory.h" |
40 | | #include "exprs/function_context.h" |
41 | | #include "util/simd/vstring_function.h" |
42 | | |
43 | | namespace doris { |
44 | | #include "common/compile_check_avoid_begin.h" |
45 | | class FunctionStrcmp : public IFunction { |
46 | | public: |
47 | | static constexpr auto name = "strcmp"; |
48 | | |
49 | 21 | static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); } |
50 | | |
51 | 1 | String get_name() const override { return name; } |
52 | | |
53 | 12 | size_t get_number_of_arguments() const override { return 2; } |
54 | | |
55 | 12 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
56 | 12 | return std::make_shared<DataTypeInt8>(); |
57 | 12 | } |
58 | | |
59 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
60 | 10 | uint32_t result, size_t input_rows_count) const override { |
61 | 10 | const auto& [arg0_column, arg0_const] = |
62 | 10 | unpack_if_const(block.get_by_position(arguments[0]).column); |
63 | 10 | const auto& [arg1_column, arg1_const] = |
64 | 10 | unpack_if_const(block.get_by_position(arguments[1]).column); |
65 | | |
66 | 10 | auto result_column = ColumnInt8::create(input_rows_count); |
67 | | |
68 | 10 | if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) { |
69 | 10 | if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) { |
70 | 10 | if (arg0_const) { |
71 | 0 | scalar_vector(arg0->get_data_at(0), *arg1, *result_column); |
72 | 10 | } else if (arg1_const) { |
73 | 0 | vector_scalar(*arg0, arg1->get_data_at(0), *result_column); |
74 | 10 | } else { |
75 | 10 | vector_vector(*arg0, *arg1, *result_column); |
76 | 10 | } |
77 | 10 | } |
78 | 10 | } |
79 | | |
80 | 10 | block.replace_by_position(result, std::move(result_column)); |
81 | 10 | return Status::OK(); |
82 | 10 | } |
83 | | |
84 | | private: |
85 | 0 | static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) { |
86 | 0 | size_t size = vec1.size(); |
87 | 0 | for (size_t i = 0; i < size; ++i) { |
88 | 0 | res.get_data()[i] = str.compare(vec1.get_data_at(i)); |
89 | 0 | } |
90 | 0 | } |
91 | | |
92 | 0 | static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) { |
93 | 0 | size_t size = vec0.size(); |
94 | 0 | for (size_t i = 0; i < size; ++i) { |
95 | 0 | res.get_data()[i] = vec0.get_data_at(i).compare(str); |
96 | 0 | } |
97 | 0 | } |
98 | | |
99 | 10 | static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) { |
100 | 10 | size_t size = vec0.size(); |
101 | 20 | for (size_t i = 0; i < size; ++i) { |
102 | 10 | res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i)); |
103 | 10 | } |
104 | 10 | } |
105 | | }; |
106 | | |
107 | | template <typename Impl> |
108 | | class FunctionSubstring : public IFunction { |
109 | | public: |
110 | | static constexpr auto name = SubstringUtil::name; |
111 | 2 | String get_name() const override { return name; }_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE8get_nameB5cxx11Ev Line | Count | Source | 111 | 1 | String get_name() const override { return name; } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE8get_nameB5cxx11Ev Line | Count | Source | 111 | 1 | String get_name() const override { return name; } |
|
112 | 28.4k | static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }_ZN5doris17FunctionSubstringINS_11Substr3ImplEE6createEv Line | Count | Source | 112 | 28.3k | static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); } |
_ZN5doris17FunctionSubstringINS_11Substr2ImplEE6createEv Line | Count | Source | 112 | 94 | static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); } |
|
113 | | |
114 | 28.3k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
115 | 28.3k | return std::make_shared<DataTypeString>(); |
116 | 28.3k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 114 | 28.3k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 115 | 28.3k | return std::make_shared<DataTypeString>(); | 116 | 28.3k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 114 | 85 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 115 | 85 | return std::make_shared<DataTypeString>(); | 116 | 85 | } |
|
117 | 28.4k | DataTypes get_variadic_argument_types_impl() const override { |
118 | 28.4k | return Impl::get_variadic_argument_types(); |
119 | 28.4k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 117 | 28.3k | DataTypes get_variadic_argument_types_impl() const override { | 118 | 28.3k | return Impl::get_variadic_argument_types(); | 119 | 28.3k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 117 | 93 | DataTypes get_variadic_argument_types_impl() const override { | 118 | 93 | return Impl::get_variadic_argument_types(); | 119 | 93 | } |
|
120 | 28.3k | size_t get_number_of_arguments() const override { |
121 | 28.3k | return get_variadic_argument_types_impl().size(); |
122 | 28.3k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE23get_number_of_argumentsEv Line | Count | Source | 120 | 28.3k | size_t get_number_of_arguments() const override { | 121 | 28.3k | return get_variadic_argument_types_impl().size(); | 122 | 28.3k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE23get_number_of_argumentsEv Line | Count | Source | 120 | 85 | size_t get_number_of_arguments() const override { | 121 | 85 | return get_variadic_argument_types_impl().size(); | 122 | 85 | } |
|
123 | | |
124 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
125 | 28.0k | uint32_t result, size_t input_rows_count) const override { |
126 | 28.0k | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
127 | 28.0k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 125 | 28.0k | uint32_t result, size_t input_rows_count) const override { | 126 | 28.0k | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 127 | 28.0k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 125 | 55 | uint32_t result, size_t input_rows_count) const override { | 126 | 55 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 127 | 55 | } |
|
128 | | }; |
129 | | |
130 | | struct Substr3Impl { |
131 | 28.3k | static DataTypes get_variadic_argument_types() { |
132 | 28.3k | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(), |
133 | 28.3k | std::make_shared<DataTypeInt32>()}; |
134 | 28.3k | } |
135 | | |
136 | | static Status execute_impl(FunctionContext* context, Block& block, |
137 | | const ColumnNumbers& arguments, uint32_t result, |
138 | 28.0k | size_t input_rows_count) { |
139 | 28.0k | SubstringUtil::substring_execute(block, arguments, result, input_rows_count); |
140 | 28.0k | return Status::OK(); |
141 | 28.0k | } |
142 | | }; |
143 | | |
144 | | struct Substr2Impl { |
145 | 93 | static DataTypes get_variadic_argument_types() { |
146 | 93 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()}; |
147 | 93 | } |
148 | | |
149 | | static Status execute_impl(FunctionContext* context, Block& block, |
150 | | const ColumnNumbers& arguments, uint32_t result, |
151 | 55 | size_t input_rows_count) { |
152 | 55 | auto col_len = ColumnInt32::create(input_rows_count); |
153 | 55 | auto& strlen_data = col_len->get_data(); |
154 | | |
155 | 55 | ColumnPtr str_col; |
156 | 55 | bool str_const; |
157 | 55 | std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column); |
158 | | |
159 | 55 | const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets(); |
160 | | |
161 | 55 | if (str_const) { |
162 | 18 | std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]); |
163 | 37 | } else { |
164 | 101 | for (int i = 0; i < input_rows_count; ++i) { |
165 | 64 | strlen_data[i] = str_offset[i] - str_offset[i - 1]; |
166 | 64 | } |
167 | 37 | } |
168 | | |
169 | | // we complete the column2(strlen) with the default value - each row's strlen. |
170 | 55 | block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"}); |
171 | 55 | ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1}; |
172 | | |
173 | 55 | SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); |
174 | 55 | return Status::OK(); |
175 | 55 | } |
176 | | }; |
177 | | |
178 | | class FunctionLeft : public IFunction { |
179 | | public: |
180 | | static constexpr auto name = "left"; |
181 | 309 | static FunctionPtr create() { return std::make_shared<FunctionLeft>(); } |
182 | 1 | String get_name() const override { return name; } |
183 | 300 | size_t get_number_of_arguments() const override { return 2; } |
184 | 300 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
185 | 300 | return std::make_shared<DataTypeString>(); |
186 | 300 | } |
187 | | |
188 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
189 | 304 | uint32_t result, size_t input_rows_count) const override { |
190 | 304 | DCHECK_EQ(arguments.size(), 2); |
191 | 304 | auto res = ColumnString::create(); |
192 | 304 | bool col_const[2]; |
193 | 304 | ColumnPtr argument_columns[2]; |
194 | 911 | for (int i = 0; i < 2; ++i) { |
195 | 607 | std::tie(argument_columns[i], col_const[i]) = |
196 | 607 | unpack_if_const(block.get_by_position(arguments[i]).column); |
197 | 607 | } |
198 | | |
199 | 304 | const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]); |
200 | 304 | const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]); |
201 | 304 | const auto is_ascii = str_col.is_ascii(); |
202 | | |
203 | 304 | std::visit( |
204 | 304 | [&](auto is_ascii, auto str_const, auto len_const) { |
205 | 304 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, |
206 | 304 | input_rows_count); |
207 | 304 | }, _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 204 | 66 | [&](auto is_ascii, auto str_const, auto len_const) { | 205 | 66 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 206 | 66 | input_rows_count); | 207 | 66 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 204 | 8 | [&](auto is_ascii, auto str_const, auto len_const) { | 205 | 8 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 206 | 8 | input_rows_count); | 207 | 8 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 204 | 8 | [&](auto is_ascii, auto str_const, auto len_const) { | 205 | 8 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 206 | 8 | input_rows_count); | 207 | 8 | }, |
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 204 | 106 | [&](auto is_ascii, auto str_const, auto len_const) { | 205 | 106 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 206 | 106 | input_rows_count); | 207 | 106 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 204 | 80 | [&](auto is_ascii, auto str_const, auto len_const) { | 205 | 80 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 206 | 80 | input_rows_count); | 207 | 80 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 204 | 36 | [&](auto is_ascii, auto str_const, auto len_const) { | 205 | 36 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 206 | 36 | input_rows_count); | 207 | 36 | }, |
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
208 | 304 | make_bool_variant(is_ascii), make_bool_variant(col_const[0]), |
209 | 304 | make_bool_variant(col_const[1])); |
210 | | |
211 | 304 | block.get_by_position(result).column = std::move(res); |
212 | 304 | return Status::OK(); |
213 | 304 | } |
214 | | |
215 | | template <bool is_ascii, bool str_const, bool len_const> |
216 | | static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res, |
217 | 304 | size_t size) { |
218 | 304 | auto& res_chars = res.get_chars(); |
219 | 304 | auto& res_offsets = res.get_offsets(); |
220 | 304 | res_offsets.resize(size); |
221 | 304 | const auto& len_data = len_col.get_data(); |
222 | | |
223 | 304 | if constexpr (str_const) { |
224 | 44 | res_chars.reserve(size * (str_col.get_chars().size())); |
225 | 260 | } else { |
226 | 260 | res_chars.reserve(str_col.get_chars().size()); |
227 | 260 | } |
228 | | |
229 | 806 | for (int i = 0; i < size; ++i) { |
230 | 502 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); |
231 | 502 | int len = len_data[index_check_const<len_const>(i)]; |
232 | 502 | if (len <= 0 || str.empty()) { |
233 | 92 | StringOP::push_empty_string(i, res_chars, res_offsets); |
234 | 92 | continue; |
235 | 92 | } |
236 | | |
237 | 410 | const char* begin = str.begin(); |
238 | 410 | const char* p = begin; |
239 | | |
240 | 410 | if constexpr (is_ascii) { |
241 | 322 | p = begin + std::min(len, static_cast<int>(str.size)); |
242 | 322 | } else { |
243 | 88 | const char* end = str.end(); |
244 | 604 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { |
245 | 516 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; |
246 | 516 | } |
247 | 88 | } |
248 | | |
249 | 410 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, |
250 | 410 | res_offsets); |
251 | 410 | } |
252 | 304 | } _ZN5doris12FunctionLeft8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 217 | 66 | size_t size) { | 218 | 66 | auto& res_chars = res.get_chars(); | 219 | 66 | auto& res_offsets = res.get_offsets(); | 220 | 66 | res_offsets.resize(size); | 221 | 66 | const auto& len_data = len_col.get_data(); | 222 | | | 223 | | if constexpr (str_const) { | 224 | | res_chars.reserve(size * (str_col.get_chars().size())); | 225 | 66 | } else { | 226 | 66 | res_chars.reserve(str_col.get_chars().size()); | 227 | 66 | } | 228 | | | 229 | 186 | for (int i = 0; i < size; ++i) { | 230 | 120 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 231 | 120 | int len = len_data[index_check_const<len_const>(i)]; | 232 | 120 | if (len <= 0 || str.empty()) { | 233 | 46 | StringOP::push_empty_string(i, res_chars, res_offsets); | 234 | 46 | continue; | 235 | 46 | } | 236 | | | 237 | 74 | const char* begin = str.begin(); | 238 | 74 | const char* p = begin; | 239 | | | 240 | | if constexpr (is_ascii) { | 241 | | p = begin + std::min(len, static_cast<int>(str.size)); | 242 | 74 | } else { | 243 | 74 | const char* end = str.end(); | 244 | 522 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 245 | 448 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 246 | 448 | } | 247 | 74 | } | 248 | | | 249 | 74 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 250 | 74 | res_offsets); | 251 | 74 | } | 252 | 66 | } |
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 217 | 8 | size_t size) { | 218 | 8 | auto& res_chars = res.get_chars(); | 219 | 8 | auto& res_offsets = res.get_offsets(); | 220 | 8 | res_offsets.resize(size); | 221 | 8 | const auto& len_data = len_col.get_data(); | 222 | | | 223 | | if constexpr (str_const) { | 224 | | res_chars.reserve(size * (str_col.get_chars().size())); | 225 | 8 | } else { | 226 | 8 | res_chars.reserve(str_col.get_chars().size()); | 227 | 8 | } | 228 | | | 229 | 16 | for (int i = 0; i < size; ++i) { | 230 | 8 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 231 | 8 | int len = len_data[index_check_const<len_const>(i)]; | 232 | 8 | if (len <= 0 || str.empty()) { | 233 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 234 | 1 | continue; | 235 | 1 | } | 236 | | | 237 | 7 | const char* begin = str.begin(); | 238 | 7 | const char* p = begin; | 239 | | | 240 | | if constexpr (is_ascii) { | 241 | | p = begin + std::min(len, static_cast<int>(str.size)); | 242 | 7 | } else { | 243 | 7 | const char* end = str.end(); | 244 | 41 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 245 | 34 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 246 | 34 | } | 247 | 7 | } | 248 | | | 249 | 7 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 250 | 7 | res_offsets); | 251 | 7 | } | 252 | 8 | } |
_ZN5doris12FunctionLeft8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 217 | 8 | size_t size) { | 218 | 8 | auto& res_chars = res.get_chars(); | 219 | 8 | auto& res_offsets = res.get_offsets(); | 220 | 8 | res_offsets.resize(size); | 221 | 8 | const auto& len_data = len_col.get_data(); | 222 | | | 223 | 8 | if constexpr (str_const) { | 224 | 8 | res_chars.reserve(size * (str_col.get_chars().size())); | 225 | | } else { | 226 | | res_chars.reserve(str_col.get_chars().size()); | 227 | | } | 228 | | | 229 | 16 | for (int i = 0; i < size; ++i) { | 230 | 8 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 231 | 8 | int len = len_data[index_check_const<len_const>(i)]; | 232 | 8 | if (len <= 0 || str.empty()) { | 233 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 234 | 1 | continue; | 235 | 1 | } | 236 | | | 237 | 7 | const char* begin = str.begin(); | 238 | 7 | const char* p = begin; | 239 | | | 240 | | if constexpr (is_ascii) { | 241 | | p = begin + std::min(len, static_cast<int>(str.size)); | 242 | 7 | } else { | 243 | 7 | const char* end = str.end(); | 244 | 41 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 245 | 34 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 246 | 34 | } | 247 | 7 | } | 248 | | | 249 | 7 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 250 | 7 | res_offsets); | 251 | 7 | } | 252 | 8 | } |
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m _ZN5doris12FunctionLeft8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 217 | 106 | size_t size) { | 218 | 106 | auto& res_chars = res.get_chars(); | 219 | 106 | auto& res_offsets = res.get_offsets(); | 220 | 106 | res_offsets.resize(size); | 221 | 106 | const auto& len_data = len_col.get_data(); | 222 | | | 223 | | if constexpr (str_const) { | 224 | | res_chars.reserve(size * (str_col.get_chars().size())); | 225 | 106 | } else { | 226 | 106 | res_chars.reserve(str_col.get_chars().size()); | 227 | 106 | } | 228 | | | 229 | 280 | for (int i = 0; i < size; ++i) { | 230 | 174 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 231 | 174 | int len = len_data[index_check_const<len_const>(i)]; | 232 | 174 | if (len <= 0 || str.empty()) { | 233 | 24 | StringOP::push_empty_string(i, res_chars, res_offsets); | 234 | 24 | continue; | 235 | 24 | } | 236 | | | 237 | 150 | const char* begin = str.begin(); | 238 | 150 | const char* p = begin; | 239 | | | 240 | 150 | if constexpr (is_ascii) { | 241 | 150 | p = begin + std::min(len, static_cast<int>(str.size)); | 242 | | } else { | 243 | | const char* end = str.end(); | 244 | | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 245 | | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 246 | | } | 247 | | } | 248 | | | 249 | 150 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 250 | 150 | res_offsets); | 251 | 150 | } | 252 | 106 | } |
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 217 | 80 | size_t size) { | 218 | 80 | auto& res_chars = res.get_chars(); | 219 | 80 | auto& res_offsets = res.get_offsets(); | 220 | 80 | res_offsets.resize(size); | 221 | 80 | const auto& len_data = len_col.get_data(); | 222 | | | 223 | | if constexpr (str_const) { | 224 | | res_chars.reserve(size * (str_col.get_chars().size())); | 225 | 80 | } else { | 226 | 80 | res_chars.reserve(str_col.get_chars().size()); | 227 | 80 | } | 228 | | | 229 | 236 | for (int i = 0; i < size; ++i) { | 230 | 156 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 231 | 156 | int len = len_data[index_check_const<len_const>(i)]; | 232 | 156 | if (len <= 0 || str.empty()) { | 233 | 10 | StringOP::push_empty_string(i, res_chars, res_offsets); | 234 | 10 | continue; | 235 | 10 | } | 236 | | | 237 | 146 | const char* begin = str.begin(); | 238 | 146 | const char* p = begin; | 239 | | | 240 | 146 | if constexpr (is_ascii) { | 241 | 146 | p = begin + std::min(len, static_cast<int>(str.size)); | 242 | | } else { | 243 | | const char* end = str.end(); | 244 | | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 245 | | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 246 | | } | 247 | | } | 248 | | | 249 | 146 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 250 | 146 | res_offsets); | 251 | 146 | } | 252 | 80 | } |
_ZN5doris12FunctionLeft8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 217 | 36 | size_t size) { | 218 | 36 | auto& res_chars = res.get_chars(); | 219 | 36 | auto& res_offsets = res.get_offsets(); | 220 | 36 | res_offsets.resize(size); | 221 | 36 | const auto& len_data = len_col.get_data(); | 222 | | | 223 | 36 | if constexpr (str_const) { | 224 | 36 | res_chars.reserve(size * (str_col.get_chars().size())); | 225 | | } else { | 226 | | res_chars.reserve(str_col.get_chars().size()); | 227 | | } | 228 | | | 229 | 72 | for (int i = 0; i < size; ++i) { | 230 | 36 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 231 | 36 | int len = len_data[index_check_const<len_const>(i)]; | 232 | 36 | if (len <= 0 || str.empty()) { | 233 | 10 | StringOP::push_empty_string(i, res_chars, res_offsets); | 234 | 10 | continue; | 235 | 10 | } | 236 | | | 237 | 26 | const char* begin = str.begin(); | 238 | 26 | const char* p = begin; | 239 | | | 240 | 26 | if constexpr (is_ascii) { | 241 | 26 | p = begin + std::min(len, static_cast<int>(str.size)); | 242 | | } else { | 243 | | const char* end = str.end(); | 244 | | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 245 | | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 246 | | } | 247 | | } | 248 | | | 249 | 26 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 250 | 26 | res_offsets); | 251 | 26 | } | 252 | 36 | } |
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m |
253 | | }; |
254 | | |
255 | | class FunctionRight : public IFunction { |
256 | | public: |
257 | | static constexpr auto name = "right"; |
258 | 210 | static FunctionPtr create() { return std::make_shared<FunctionRight>(); } |
259 | 1 | String get_name() const override { return name; } |
260 | 201 | size_t get_number_of_arguments() const override { return 2; } |
261 | 201 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
262 | 201 | return std::make_shared<DataTypeString>(); |
263 | 201 | } |
264 | | |
265 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
266 | 221 | uint32_t result, size_t input_rows_count) const override { |
267 | 221 | auto int_type = std::make_shared<DataTypeInt32>(); |
268 | 221 | auto params1 = ColumnInt32::create(input_rows_count); |
269 | 221 | auto params2 = ColumnInt32::create(input_rows_count); |
270 | 221 | size_t num_columns_without_result = block.columns(); |
271 | | |
272 | | // params1 = max(arg[1], -len(arg)) |
273 | 221 | auto& index_data = params1->get_data(); |
274 | 221 | auto& strlen_data = params2->get_data(); |
275 | | |
276 | 221 | auto str_col = |
277 | 221 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
278 | 221 | const auto* str_column = assert_cast<const ColumnString*>(str_col.get()); |
279 | 221 | auto pos_col = |
280 | 221 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
281 | 221 | const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data(); |
282 | | |
283 | 604 | for (int i = 0; i < input_rows_count; ++i) { |
284 | 383 | auto str = str_column->get_data_at(i); |
285 | 383 | strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size); |
286 | 383 | } |
287 | | |
288 | 604 | for (int i = 0; i < input_rows_count; ++i) { |
289 | 383 | index_data[i] = std::max(-pos_data[i], -strlen_data[i]); |
290 | 383 | } |
291 | | |
292 | 221 | block.insert({std::move(params1), int_type, "index"}); |
293 | 221 | block.insert({std::move(params2), int_type, "strlen"}); |
294 | | |
295 | 221 | ColumnNumbers temp_arguments(3); |
296 | 221 | temp_arguments[0] = arguments[0]; |
297 | 221 | temp_arguments[1] = num_columns_without_result; |
298 | 221 | temp_arguments[2] = num_columns_without_result + 1; |
299 | 221 | SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); |
300 | 221 | return Status::OK(); |
301 | 221 | } |
302 | | }; |
303 | | |
304 | | struct NullOrEmptyImpl { |
305 | 0 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; } |
306 | | |
307 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
308 | 95 | uint32_t result, size_t input_rows_count, bool reverse) { |
309 | 95 | auto res_map = ColumnUInt8::create(input_rows_count, 0); |
310 | | |
311 | 95 | auto column = block.get_by_position(arguments[0]).column; |
312 | 95 | if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) { |
313 | 49 | column = nullable->get_nested_column_ptr(); |
314 | 49 | VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data()); |
315 | 49 | } |
316 | 95 | auto str_col = assert_cast<const ColumnString*>(column.get()); |
317 | 95 | const auto& offsets = str_col->get_offsets(); |
318 | | |
319 | 95 | auto& res_map_data = res_map->get_data(); |
320 | 324 | for (int i = 0; i < input_rows_count; ++i) { |
321 | 229 | int size = offsets[i] - offsets[i - 1]; |
322 | 229 | res_map_data[i] |= (size == 0); |
323 | 229 | } |
324 | 95 | if (reverse) { |
325 | 196 | for (int i = 0; i < input_rows_count; ++i) { |
326 | 143 | res_map_data[i] = !res_map_data[i]; |
327 | 143 | } |
328 | 53 | } |
329 | | |
330 | 95 | block.replace_by_position(result, std::move(res_map)); |
331 | 95 | return Status::OK(); |
332 | 95 | } |
333 | | }; |
334 | | |
335 | | class FunctionNullOrEmpty : public IFunction { |
336 | | public: |
337 | | static constexpr auto name = "null_or_empty"; |
338 | 38 | static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); } |
339 | 1 | String get_name() const override { return name; } |
340 | 29 | size_t get_number_of_arguments() const override { return 1; } |
341 | | |
342 | 29 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
343 | 29 | return std::make_shared<DataTypeUInt8>(); |
344 | 29 | } |
345 | | |
346 | 71 | bool use_default_implementation_for_nulls() const override { return false; } |
347 | | |
348 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
349 | 42 | uint32_t result, size_t input_rows_count) const override { |
350 | 42 | RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result, |
351 | 42 | input_rows_count, false)); |
352 | 42 | return Status::OK(); |
353 | 42 | } |
354 | | }; |
355 | | |
356 | | class FunctionNotNullOrEmpty : public IFunction { |
357 | | public: |
358 | | static constexpr auto name = "not_null_or_empty"; |
359 | 50 | static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); } |
360 | 1 | String get_name() const override { return name; } |
361 | 41 | size_t get_number_of_arguments() const override { return 1; } |
362 | | |
363 | 41 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
364 | 41 | return std::make_shared<DataTypeUInt8>(); |
365 | 41 | } |
366 | | |
367 | 94 | bool use_default_implementation_for_nulls() const override { return false; } |
368 | | |
369 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
370 | 53 | uint32_t result, size_t input_rows_count) const override { |
371 | 53 | RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result, |
372 | 53 | input_rows_count, true)); |
373 | 53 | return Status::OK(); |
374 | 53 | } |
375 | | }; |
376 | | |
377 | 8 | void register_function_string_basic(SimpleFunctionFactory& factory) { |
378 | 8 | factory.register_function<FunctionSubstring<Substr3Impl>>(); |
379 | 8 | factory.register_function<FunctionSubstring<Substr2Impl>>(); |
380 | 8 | factory.register_function<FunctionLeft>(); |
381 | 8 | factory.register_function<FunctionRight>(); |
382 | 8 | factory.register_function<FunctionNullOrEmpty>(); |
383 | 8 | factory.register_function<FunctionNotNullOrEmpty>(); |
384 | 8 | factory.register_function<FunctionStrcmp>(); |
385 | | |
386 | 8 | factory.register_alias(FunctionLeft::name, "strleft"); |
387 | 8 | factory.register_alias(FunctionRight::name, "strright"); |
388 | 8 | factory.register_alias(SubstringUtil::name, "substr"); |
389 | 8 | factory.register_alias(SubstringUtil::name, "mid"); |
390 | 8 | } |
391 | | |
392 | | #include "common/compile_check_avoid_end.h" |
393 | | } // namespace doris |