be/src/exprs/function/function_string.cpp
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #include <ctype.h> |
19 | | #include <math.h> |
20 | | #include <re2/stringpiece.h> |
21 | | #include <unicode/schriter.h> |
22 | | #include <unicode/uchar.h> |
23 | | #include <unicode/unistr.h> |
24 | | #include <unicode/ustream.h> |
25 | | |
26 | | #include <bitset> |
27 | | #include <compare> |
28 | | #include <cstddef> |
29 | | #include <cstdint> |
30 | | #include <limits> |
31 | | #include <optional> |
32 | | #include <string> |
33 | | #include <string_view> |
34 | | |
35 | | #include "common/cast_set.h" |
36 | | #include "common/status.h" |
37 | | #include "core/column/column.h" |
38 | | #include "core/column/column_string.h" |
39 | | #include "core/data_type/data_type_nullable.h" |
40 | | #include "core/pod_array_fwd.h" |
41 | | #include "core/string_ref.h" |
42 | | #include "exprs/expr_zonemap_filter.h" |
43 | | #include "exprs/function/function_reverse.h" |
44 | | #include "exprs/function/function_string_concat.h" |
45 | | #include "exprs/function/function_string_format.h" |
46 | | #include "exprs/function/function_string_replace.h" |
47 | | #include "exprs/function/function_string_to_string.h" |
48 | | #include "exprs/function/function_totype.h" |
49 | | #include "exprs/function/simple_function_factory.h" |
50 | | #include "exprs/function/string_hex_util.h" |
51 | | #include "util/string_search.hpp" |
52 | | #include "util/url_coding.h" |
53 | | #include "util/utf8_check.h" |
54 | | |
55 | | namespace doris { |
56 | | struct NameStringASCII { |
57 | | static constexpr auto name = "ascii"; |
58 | | }; |
59 | | |
60 | | struct StringASCII { |
61 | | using ReturnType = DataTypeInt32; |
62 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
63 | | using Type = String; |
64 | | using ReturnColumnType = ColumnInt32; |
65 | | |
66 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
67 | 54 | PaddedPODArray<Int32>& res) { |
68 | 54 | auto size = offsets.size(); |
69 | 54 | res.resize(size); |
70 | 152 | for (int i = 0; i < size; ++i) { |
71 | 98 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
72 | 98 | res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]); |
73 | 98 | } |
74 | 54 | return Status::OK(); |
75 | 54 | } |
76 | | }; |
77 | | |
78 | | struct NameParseDataSize { |
79 | | static constexpr auto name = "parse_data_size"; |
80 | | }; |
81 | | |
82 | | static const std::map<std::string_view, Int128> UNITS = { |
83 | | {"B", static_cast<Int128>(1)}, {"kB", static_cast<Int128>(1) << 10}, |
84 | | {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30}, |
85 | | {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50}, |
86 | | {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70}, |
87 | | {"YB", static_cast<Int128>(1) << 80}}; |
88 | | |
89 | | struct ParseDataSize { |
90 | | using ReturnType = DataTypeInt128; |
91 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
92 | | using Type = String; |
93 | | using ReturnColumnType = ColumnInt128; |
94 | | |
95 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
96 | 50 | PaddedPODArray<Int128>& res) { |
97 | 50 | auto size = offsets.size(); |
98 | 50 | res.resize(size); |
99 | 104 | for (int i = 0; i < size; ++i) { |
100 | 54 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
101 | 54 | int str_size = offsets[i] - offsets[i - 1]; |
102 | 54 | res[i] = parse_data_size(std::string_view(raw_str, str_size)); |
103 | 54 | } |
104 | 50 | return Status::OK(); |
105 | 50 | } |
106 | | |
107 | 54 | static Int128 parse_data_size(const std::string_view& dataSize) { |
108 | 54 | int digit_length = 0; |
109 | 230 | for (char c : dataSize) { |
110 | 230 | if (isdigit(c) || c == '.') { |
111 | 178 | digit_length++; |
112 | 178 | } else { |
113 | 52 | break; |
114 | 52 | } |
115 | 230 | } |
116 | | |
117 | 54 | if (digit_length == 0) { |
118 | 4 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
119 | 4 | "Invalid Input argument \"{}\" of function parse_data_size", |
120 | 4 | dataSize); |
121 | 4 | } |
122 | | // 123.45MB--->123.45 : MB |
123 | 50 | double value = 0.0; |
124 | 50 | try { |
125 | 50 | value = std::stod(std::string(dataSize.substr(0, digit_length))); |
126 | 50 | } catch (const std::exception& e) { |
127 | 0 | throw doris::Exception( |
128 | 0 | ErrorCode::INVALID_ARGUMENT, |
129 | 0 | "Invalid Input argument \"{}\" of function parse_data_size, error: {}", |
130 | 0 | dataSize, e.what()); |
131 | 0 | } |
132 | 50 | auto unit = dataSize.substr(digit_length); |
133 | 50 | auto it = UNITS.find(unit); |
134 | 50 | if (it != UNITS.end()) { |
135 | 47 | return static_cast<__int128>(static_cast<long double>(it->second) * value); |
136 | 47 | } else { |
137 | 3 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
138 | 3 | "Invalid Input argument \"{}\" of function parse_data_size", |
139 | 3 | dataSize); |
140 | 3 | } |
141 | 50 | } |
142 | | }; |
143 | | |
144 | | struct NameQuote { |
145 | | static constexpr auto name = "quote"; |
146 | | }; |
147 | | |
148 | | struct NameQuoteImpl { |
149 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
150 | 17 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
151 | 17 | size_t offset_size = offsets.size(); |
152 | 17 | ColumnString::Offset pos = 0; |
153 | 17 | res_offsets.resize(offset_size); |
154 | 17 | res_data.resize(data.size() + offset_size * 2); |
155 | 45 | for (int i = 0; i < offset_size; i++) { |
156 | 28 | const unsigned char* raw_str = &data[offsets[i - 1]]; |
157 | 28 | ColumnString::Offset size = offsets[i] - offsets[i - 1]; |
158 | 28 | res_data[pos] = '\''; |
159 | 28 | std::memcpy(res_data.data() + pos + 1, raw_str, size); |
160 | 28 | res_data[pos + size + 1] = '\''; |
161 | 28 | pos += size + 2; |
162 | 28 | res_offsets[i] = pos; |
163 | 28 | } |
164 | 17 | return Status::OK(); |
165 | 17 | } |
166 | | }; |
167 | | |
168 | | struct NameStringLength { |
169 | | static constexpr auto name = "length"; |
170 | | }; |
171 | | |
172 | | struct StringLengthImpl { |
173 | | using ReturnType = DataTypeInt32; |
174 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
175 | | using Type = String; |
176 | | using ReturnColumnType = ColumnInt32; |
177 | | |
178 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
179 | 9.47k | PaddedPODArray<Int32>& res) { |
180 | 9.47k | auto size = offsets.size(); |
181 | 9.47k | res.resize(size); |
182 | 8.29M | for (int i = 0; i < size; ++i) { |
183 | 8.28M | int str_size = offsets[i] - offsets[i - 1]; |
184 | 8.28M | res[i] = str_size; |
185 | 8.28M | } |
186 | 9.47k | return Status::OK(); |
187 | 9.47k | } |
188 | | }; |
189 | | |
190 | | struct NameCrc32 { |
191 | | static constexpr auto name = "crc32"; |
192 | | }; |
193 | | |
194 | | struct Crc32Impl { |
195 | | using ReturnType = DataTypeInt64; |
196 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
197 | | using Type = String; |
198 | | using ReturnColumnType = ColumnInt64; |
199 | | |
200 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
201 | 3 | PaddedPODArray<Int64>& res) { |
202 | 3 | auto size = offsets.size(); |
203 | 3 | res.resize(size); |
204 | 6 | for (int i = 0; i < size; ++i) { |
205 | 3 | res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1], |
206 | 3 | offsets[i] - offsets[i - 1]); |
207 | 3 | } |
208 | 3 | return Status::OK(); |
209 | 3 | } |
210 | | }; |
211 | | |
212 | | struct NameStringUtf8Length { |
213 | | static constexpr auto name = "char_length"; |
214 | | }; |
215 | | |
216 | | struct StringUtf8LengthImpl { |
217 | | using ReturnType = DataTypeInt32; |
218 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
219 | | using Type = String; |
220 | | using ReturnColumnType = ColumnInt32; |
221 | | |
222 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
223 | 58 | PaddedPODArray<Int32>& res) { |
224 | 58 | auto size = offsets.size(); |
225 | 58 | res.resize(size); |
226 | 164 | for (int i = 0; i < size; ++i) { |
227 | 106 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
228 | 106 | int str_size = offsets[i] - offsets[i - 1]; |
229 | 106 | res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size); |
230 | 106 | } |
231 | 58 | return Status::OK(); |
232 | 58 | } |
233 | | }; |
234 | | |
235 | | struct NameIsValidUTF8 { |
236 | | static constexpr auto name = "is_valid_utf8"; |
237 | | }; |
238 | | |
239 | | struct IsValidUTF8Impl { |
240 | | using ReturnType = DataTypeUInt8; |
241 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
242 | | using Type = String; |
243 | | using ReturnColumnType = ColumnUInt8; |
244 | | |
245 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
246 | 39 | PaddedPODArray<UInt8>& res) { |
247 | 39 | auto size = offsets.size(); |
248 | 39 | res.resize(size); |
249 | 98 | for (size_t i = 0; i < size; ++i) { |
250 | 59 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
251 | 59 | size_t str_size = offsets[i] - offsets[i - 1]; |
252 | 59 | res[i] = validate_utf8(raw_str, str_size) ? 1 : 0; |
253 | 59 | } |
254 | 39 | return Status::OK(); |
255 | 39 | } |
256 | | }; |
257 | | |
258 | | struct NameStartsWith { |
259 | | static constexpr auto name = "starts_with"; |
260 | | }; |
261 | | |
262 | | struct StartsWithOp { |
263 | | using ResultDataType = DataTypeUInt8; |
264 | | using ResultPaddedPODArray = PaddedPODArray<UInt8>; |
265 | | |
266 | 11.3k | static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) { |
267 | 11.3k | res = strl.starts_with(strr); |
268 | 11.3k | } |
269 | | }; |
270 | | |
271 | | struct NameEndsWith { |
272 | | static constexpr auto name = "ends_with"; |
273 | | }; |
274 | | |
275 | | struct EndsWithOp { |
276 | | using ResultDataType = DataTypeUInt8; |
277 | | using ResultPaddedPODArray = PaddedPODArray<UInt8>; |
278 | | |
279 | 142 | static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) { |
280 | 142 | res = strl.ends_with(strr); |
281 | 142 | } |
282 | | }; |
283 | | |
284 | | struct NameFindInSet { |
285 | | static constexpr auto name = "find_in_set"; |
286 | | }; |
287 | | |
288 | | struct FindInSetOp { |
289 | | using ResultDataType = DataTypeInt32; |
290 | | using ResultPaddedPODArray = PaddedPODArray<Int32>; |
291 | 170 | static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) { |
292 | 670 | for (const auto& c : strl) { |
293 | 670 | if (c == ',') { |
294 | 21 | res = 0; |
295 | 21 | return; |
296 | 21 | } |
297 | 670 | } |
298 | | |
299 | 149 | int32_t token_index = 1; |
300 | 149 | int32_t start = 0; |
301 | 149 | int32_t end; |
302 | | |
303 | 253 | do { |
304 | 253 | end = start; |
305 | | // Position end. |
306 | 1.05k | while (end < strr.length() && strr[end] != ',') { |
307 | 806 | ++end; |
308 | 806 | } |
309 | | |
310 | 253 | if (strl == std::string_view {strr.data() + start, (size_t)end - start}) { |
311 | 93 | res = token_index; |
312 | 93 | return; |
313 | 93 | } |
314 | | |
315 | | // Re-position start and end past ',' |
316 | 160 | start = end + 1; |
317 | 160 | ++token_index; |
318 | 160 | } while (start < strr.length()); |
319 | 56 | res = 0; |
320 | 56 | } |
321 | | }; |
322 | | |
323 | | struct NameInstr { |
324 | | static constexpr auto name = "instr"; |
325 | | }; |
326 | | |
327 | | // LeftDataType and RightDataType are DataTypeString |
328 | | template <typename LeftDataType, typename RightDataType> |
329 | | struct StringInStrImpl { |
330 | | using ResultDataType = DataTypeInt32; |
331 | | using ResultPaddedPODArray = PaddedPODArray<Int32>; |
332 | | |
333 | | static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, |
334 | 72 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { |
335 | 72 | StringRef lstr_ref(ldata.data, ldata.size); |
336 | | |
337 | 72 | auto size = roffsets.size(); |
338 | 72 | res.resize(size); |
339 | 144 | for (int i = 0; i < size; ++i) { |
340 | 72 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
341 | 72 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
342 | | |
343 | 72 | StringRef rstr_ref(r_raw_str, r_str_size); |
344 | | |
345 | 72 | res[i] = execute(lstr_ref, rstr_ref); |
346 | 72 | } |
347 | | |
348 | 72 | return Status::OK(); |
349 | 72 | } |
350 | | |
351 | | static Status vector_scalar(const ColumnString::Chars& ldata, |
352 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
353 | 86 | ResultPaddedPODArray& res) { |
354 | 86 | auto size = loffsets.size(); |
355 | 86 | res.resize(size); |
356 | | |
357 | 86 | if (rdata.size == 0) { |
358 | 12 | std::fill(res.begin(), res.end(), 1); |
359 | 12 | return Status::OK(); |
360 | 12 | } |
361 | | |
362 | 74 | const UInt8* begin = ldata.data(); |
363 | 74 | const UInt8* end = begin + ldata.size(); |
364 | 74 | const UInt8* pos = begin; |
365 | | |
366 | | /// Current index in the array of strings. |
367 | 74 | size_t i = 0; |
368 | 74 | std::fill(res.begin(), res.end(), 0); |
369 | | |
370 | 74 | StringRef rstr_ref(rdata.data, rdata.size); |
371 | 74 | StringSearch search(&rstr_ref); |
372 | | |
373 | 90 | while (pos < end) { |
374 | | // search return matched substring start offset |
375 | 64 | pos = (UInt8*)search.search((char*)pos, end - pos); |
376 | 64 | if (pos >= end) { |
377 | 48 | break; |
378 | 48 | } |
379 | | |
380 | | /// Determine which index it refers to. |
381 | | /// begin + value_offsets[i] is the start offset of string at i+1 |
382 | 16 | while (begin + loffsets[i] < pos) { |
383 | 0 | ++i; |
384 | 0 | } |
385 | | |
386 | | /// We check that the entry does not pass through the boundaries of strings. |
387 | 16 | if (pos + rdata.size <= begin + loffsets[i]) { |
388 | 16 | int loc = (int)(pos - begin) - loffsets[i - 1]; |
389 | 16 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
390 | 16 | auto len = std::min(l_str_size, loc); |
391 | 16 | loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len); |
392 | 16 | res[i] = loc + 1; |
393 | 16 | } |
394 | | |
395 | | // move to next string offset |
396 | 16 | pos = begin + loffsets[i]; |
397 | 16 | ++i; |
398 | 16 | } |
399 | | |
400 | 74 | return Status::OK(); |
401 | 86 | } |
402 | | |
403 | | static Status vector_vector(const ColumnString::Chars& ldata, |
404 | | const ColumnString::Offsets& loffsets, |
405 | | const ColumnString::Chars& rdata, |
406 | 207 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { |
407 | 207 | DCHECK_EQ(loffsets.size(), roffsets.size()); |
408 | | |
409 | 207 | auto size = loffsets.size(); |
410 | 207 | res.resize(size); |
411 | 661 | for (int i = 0; i < size; ++i) { |
412 | 454 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
413 | 454 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
414 | 454 | StringRef lstr_ref(l_raw_str, l_str_size); |
415 | | |
416 | 454 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
417 | 454 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
418 | 454 | StringRef rstr_ref(r_raw_str, r_str_size); |
419 | | |
420 | 454 | res[i] = execute(lstr_ref, rstr_ref); |
421 | 454 | } |
422 | | |
423 | 207 | return Status::OK(); |
424 | 207 | } |
425 | | |
426 | 526 | static int execute(const StringRef& strl, const StringRef& strr) { |
427 | 526 | if (strr.size == 0) { |
428 | 71 | return 1; |
429 | 71 | } |
430 | | |
431 | 455 | StringSearch search(&strr); |
432 | | // Hive returns positions starting from 1. |
433 | 455 | int loc = search.search(&strl); |
434 | 455 | if (loc > 0) { |
435 | 43 | int len = std::min(loc, (int)strl.size); |
436 | 43 | loc = simd::VStringFunctions::get_char_len(strl.data, len); |
437 | 43 | } |
438 | | |
439 | 455 | return loc + 1; |
440 | 526 | } |
441 | | }; |
442 | | |
443 | | // the same impl as instr |
444 | | struct NameLocate { |
445 | | static constexpr auto name = "locate"; |
446 | | }; |
447 | | |
448 | | // LeftDataType and RightDataType are DataTypeString |
449 | | template <typename LeftDataType, typename RightDataType> |
450 | | struct StringLocateImpl { |
451 | | using ResultDataType = DataTypeInt32; |
452 | | using ResultPaddedPODArray = PaddedPODArray<Int32>; |
453 | | |
454 | | static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, |
455 | 38 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { |
456 | 38 | return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata, |
457 | 38 | res); |
458 | 38 | } |
459 | | |
460 | | static Status vector_scalar(const ColumnString::Chars& ldata, |
461 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
462 | 36 | ResultPaddedPODArray& res) { |
463 | 36 | return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets, |
464 | 36 | res); |
465 | 36 | } |
466 | | |
467 | | static Status vector_vector(const ColumnString::Chars& ldata, |
468 | | const ColumnString::Offsets& loffsets, |
469 | | const ColumnString::Chars& rdata, |
470 | 126 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { |
471 | 126 | return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata, |
472 | 126 | loffsets, res); |
473 | 126 | } |
474 | | }; |
475 | | |
476 | | // LeftDataType and RightDataType are DataTypeString |
477 | | template <typename LeftDataType, typename RightDataType, typename OP> |
478 | | struct StringFunctionImpl { |
479 | | using ResultDataType = typename OP::ResultDataType; |
480 | | using ResultPaddedPODArray = typename OP::ResultPaddedPODArray; |
481 | | |
482 | | static Status vector_vector(const ColumnString::Chars& ldata, |
483 | | const ColumnString::Offsets& loffsets, |
484 | | const ColumnString::Chars& rdata, |
485 | 213 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { |
486 | 213 | DCHECK_EQ(loffsets.size(), roffsets.size()); |
487 | | |
488 | 213 | auto size = loffsets.size(); |
489 | 213 | res.resize(size); |
490 | 576 | for (int i = 0; i < size; ++i) { |
491 | 363 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
492 | 363 | int l_str_size = loffsets[i] - loffsets[i - 1]; |
493 | | |
494 | 363 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
495 | 363 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
496 | | |
497 | 363 | std::string_view lview(l_raw_str, l_str_size); |
498 | 363 | std::string_view rview(r_raw_str, r_str_size); |
499 | | |
500 | 363 | OP::execute(lview, rview, res[i]); |
501 | 363 | } |
502 | 213 | return Status::OK(); |
503 | 213 | } _ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_ Line | Count | Source | 485 | 88 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { | 486 | 88 | DCHECK_EQ(loffsets.size(), roffsets.size()); | 487 | | | 488 | 88 | auto size = loffsets.size(); | 489 | 88 | res.resize(size); | 490 | 215 | for (int i = 0; i < size; ++i) { | 491 | 127 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); | 492 | 127 | int l_str_size = loffsets[i] - loffsets[i - 1]; | 493 | | | 494 | 127 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); | 495 | 127 | int r_str_size = roffsets[i] - roffsets[i - 1]; | 496 | | | 497 | 127 | std::string_view lview(l_raw_str, l_str_size); | 498 | 127 | std::string_view rview(r_raw_str, r_str_size); | 499 | | | 500 | 127 | OP::execute(lview, rview, res[i]); | 501 | 127 | } | 502 | 88 | return Status::OK(); | 503 | 88 | } |
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_ Line | Count | Source | 485 | 61 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { | 486 | 61 | DCHECK_EQ(loffsets.size(), roffsets.size()); | 487 | | | 488 | 61 | auto size = loffsets.size(); | 489 | 61 | res.resize(size); | 490 | 175 | for (int i = 0; i < size; ++i) { | 491 | 114 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); | 492 | 114 | int l_str_size = loffsets[i] - loffsets[i - 1]; | 493 | | | 494 | 114 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); | 495 | 114 | int r_str_size = roffsets[i] - roffsets[i - 1]; | 496 | | | 497 | 114 | std::string_view lview(l_raw_str, l_str_size); | 498 | 114 | std::string_view rview(r_raw_str, r_str_size); | 499 | | | 500 | 114 | OP::execute(lview, rview, res[i]); | 501 | 114 | } | 502 | 61 | return Status::OK(); | 503 | 61 | } |
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE Line | Count | Source | 485 | 64 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { | 486 | 64 | DCHECK_EQ(loffsets.size(), roffsets.size()); | 487 | | | 488 | 64 | auto size = loffsets.size(); | 489 | 64 | res.resize(size); | 490 | 186 | for (int i = 0; i < size; ++i) { | 491 | 122 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); | 492 | 122 | int l_str_size = loffsets[i] - loffsets[i - 1]; | 493 | | | 494 | 122 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); | 495 | 122 | int r_str_size = roffsets[i] - roffsets[i - 1]; | 496 | | | 497 | 122 | std::string_view lview(l_raw_str, l_str_size); | 498 | 122 | std::string_view rview(r_raw_str, r_str_size); | 499 | | | 500 | 122 | OP::execute(lview, rview, res[i]); | 501 | 122 | } | 502 | 64 | return Status::OK(); | 503 | 64 | } |
|
504 | | static Status vector_scalar(const ColumnString::Chars& ldata, |
505 | | const ColumnString::Offsets& loffsets, const StringRef& rdata, |
506 | 46 | ResultPaddedPODArray& res) { |
507 | 46 | auto size = loffsets.size(); |
508 | 46 | res.resize(size); |
509 | 46 | std::string_view rview(rdata.data, rdata.size); |
510 | 11.3k | for (int i = 0; i < size; ++i) { |
511 | 11.2k | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); |
512 | 11.2k | int l_str_size = loffsets[i] - loffsets[i - 1]; |
513 | 11.2k | std::string_view lview(l_raw_str, l_str_size); |
514 | | |
515 | 11.2k | OP::execute(lview, rview, res[i]); |
516 | 11.2k | } |
517 | 46 | return Status::OK(); |
518 | 46 | } _ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_ Line | Count | Source | 506 | 16 | ResultPaddedPODArray& res) { | 507 | 16 | auto size = loffsets.size(); | 508 | 16 | res.resize(size); | 509 | 16 | std::string_view rview(rdata.data, rdata.size); | 510 | 11.2k | for (int i = 0; i < size; ++i) { | 511 | 11.2k | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); | 512 | 11.2k | int l_str_size = loffsets[i] - loffsets[i - 1]; | 513 | 11.2k | std::string_view lview(l_raw_str, l_str_size); | 514 | | | 515 | 11.2k | OP::execute(lview, rview, res[i]); | 516 | 11.2k | } | 517 | 16 | return Status::OK(); | 518 | 16 | } |
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_ Line | Count | Source | 506 | 14 | ResultPaddedPODArray& res) { | 507 | 14 | auto size = loffsets.size(); | 508 | 14 | res.resize(size); | 509 | 14 | std::string_view rview(rdata.data, rdata.size); | 510 | 28 | for (int i = 0; i < size; ++i) { | 511 | 14 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); | 512 | 14 | int l_str_size = loffsets[i] - loffsets[i - 1]; | 513 | 14 | std::string_view lview(l_raw_str, l_str_size); | 514 | | | 515 | 14 | OP::execute(lview, rview, res[i]); | 516 | 14 | } | 517 | 14 | return Status::OK(); | 518 | 14 | } |
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE Line | Count | Source | 506 | 16 | ResultPaddedPODArray& res) { | 507 | 16 | auto size = loffsets.size(); | 508 | 16 | res.resize(size); | 509 | 16 | std::string_view rview(rdata.data, rdata.size); | 510 | 32 | for (int i = 0; i < size; ++i) { | 511 | 16 | const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]); | 512 | 16 | int l_str_size = loffsets[i] - loffsets[i - 1]; | 513 | 16 | std::string_view lview(l_raw_str, l_str_size); | 514 | | | 515 | 16 | OP::execute(lview, rview, res[i]); | 516 | 16 | } | 517 | 16 | return Status::OK(); | 518 | 16 | } |
|
519 | | static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata, |
520 | 44 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { |
521 | 44 | auto size = roffsets.size(); |
522 | 44 | res.resize(size); |
523 | 44 | std::string_view lview(ldata.data, ldata.size); |
524 | 94 | for (int i = 0; i < size; ++i) { |
525 | 50 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); |
526 | 50 | int r_str_size = roffsets[i] - roffsets[i - 1]; |
527 | 50 | std::string_view rview(r_raw_str, r_str_size); |
528 | | |
529 | 50 | OP::execute(lview, rview, res[i]); |
530 | 50 | } |
531 | 44 | return Status::OK(); |
532 | 44 | } _ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_ Line | Count | Source | 520 | 4 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { | 521 | 4 | auto size = roffsets.size(); | 522 | 4 | res.resize(size); | 523 | 4 | std::string_view lview(ldata.data, ldata.size); | 524 | 8 | for (int i = 0; i < size; ++i) { | 525 | 4 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); | 526 | 4 | int r_str_size = roffsets[i] - roffsets[i - 1]; | 527 | 4 | std::string_view rview(r_raw_str, r_str_size); | 528 | | | 529 | 4 | OP::execute(lview, rview, res[i]); | 530 | 4 | } | 531 | 4 | return Status::OK(); | 532 | 4 | } |
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_ Line | Count | Source | 520 | 14 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { | 521 | 14 | auto size = roffsets.size(); | 522 | 14 | res.resize(size); | 523 | 14 | std::string_view lview(ldata.data, ldata.size); | 524 | 28 | for (int i = 0; i < size; ++i) { | 525 | 14 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); | 526 | 14 | int r_str_size = roffsets[i] - roffsets[i - 1]; | 527 | 14 | std::string_view rview(r_raw_str, r_str_size); | 528 | | | 529 | 14 | OP::execute(lview, rview, res[i]); | 530 | 14 | } | 531 | 14 | return Status::OK(); | 532 | 14 | } |
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE Line | Count | Source | 520 | 26 | const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) { | 521 | 26 | auto size = roffsets.size(); | 522 | 26 | res.resize(size); | 523 | 26 | std::string_view lview(ldata.data, ldata.size); | 524 | 58 | for (int i = 0; i < size; ++i) { | 525 | 32 | const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]); | 526 | 32 | int r_str_size = roffsets[i] - roffsets[i - 1]; | 527 | 32 | std::string_view rview(r_raw_str, r_str_size); | 528 | | | 529 | 32 | OP::execute(lview, rview, res[i]); | 530 | 32 | } | 531 | 26 | return Status::OK(); | 532 | 26 | } |
|
533 | | }; |
534 | | |
535 | | struct NameToLower { |
536 | | static constexpr auto name = "lower"; |
537 | | }; |
538 | | |
539 | | struct NameToUpper { |
540 | | static constexpr auto name = "upper"; |
541 | | }; |
542 | | |
543 | | template <typename OpName> |
544 | | struct TransferImpl { |
545 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
546 | 323 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
547 | 323 | size_t offset_size = offsets.size(); |
548 | 323 | if (UNLIKELY(!offset_size)) { |
549 | 0 | return Status::OK(); |
550 | 0 | } |
551 | | |
552 | 323 | const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()}); |
553 | 323 | res_offsets.resize(offset_size); |
554 | 323 | if (is_ascii) { |
555 | 263 | memcpy_small_allow_read_write_overflow15( |
556 | 263 | res_offsets.data(), offsets.data(), |
557 | 263 | offset_size * sizeof(ColumnString::Offsets::value_type)); |
558 | | |
559 | 263 | size_t data_length = data.size(); |
560 | 263 | res_data.resize(data_length); |
561 | 263 | if constexpr (std::is_same_v<OpName, NameToUpper>) { |
562 | 92 | simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data()); |
563 | 171 | } else if constexpr (std::is_same_v<OpName, NameToLower>) { |
564 | 171 | simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); |
565 | 171 | } |
566 | 263 | } else { |
567 | 60 | execute_utf8(data, offsets, res_data, res_offsets); |
568 | 60 | } |
569 | | |
570 | 323 | return Status::OK(); |
571 | 323 | } _ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_ Line | Count | Source | 546 | 192 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 547 | 192 | size_t offset_size = offsets.size(); | 548 | 192 | if (UNLIKELY(!offset_size)) { | 549 | 0 | return Status::OK(); | 550 | 0 | } | 551 | | | 552 | 192 | const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()}); | 553 | 192 | res_offsets.resize(offset_size); | 554 | 192 | if (is_ascii) { | 555 | 171 | memcpy_small_allow_read_write_overflow15( | 556 | 171 | res_offsets.data(), offsets.data(), | 557 | 171 | offset_size * sizeof(ColumnString::Offsets::value_type)); | 558 | | | 559 | 171 | size_t data_length = data.size(); | 560 | 171 | res_data.resize(data_length); | 561 | | if constexpr (std::is_same_v<OpName, NameToUpper>) { | 562 | | simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data()); | 563 | 171 | } else if constexpr (std::is_same_v<OpName, NameToLower>) { | 564 | 171 | simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); | 565 | 171 | } | 566 | 171 | } else { | 567 | 21 | execute_utf8(data, offsets, res_data, res_offsets); | 568 | 21 | } | 569 | | | 570 | 192 | return Status::OK(); | 571 | 192 | } |
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_ Line | Count | Source | 546 | 131 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 547 | 131 | size_t offset_size = offsets.size(); | 548 | 131 | if (UNLIKELY(!offset_size)) { | 549 | 0 | return Status::OK(); | 550 | 0 | } | 551 | | | 552 | 131 | const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()}); | 553 | 131 | res_offsets.resize(offset_size); | 554 | 131 | if (is_ascii) { | 555 | 92 | memcpy_small_allow_read_write_overflow15( | 556 | 92 | res_offsets.data(), offsets.data(), | 557 | 92 | offset_size * sizeof(ColumnString::Offsets::value_type)); | 558 | | | 559 | 92 | size_t data_length = data.size(); | 560 | 92 | res_data.resize(data_length); | 561 | 92 | if constexpr (std::is_same_v<OpName, NameToUpper>) { | 562 | 92 | simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data()); | 563 | | } else if constexpr (std::is_same_v<OpName, NameToLower>) { | 564 | | simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); | 565 | | } | 566 | 92 | } else { | 567 | 39 | execute_utf8(data, offsets, res_data, res_offsets); | 568 | 39 | } | 569 | | | 570 | 131 | return Status::OK(); | 571 | 131 | } |
|
572 | | |
573 | | static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
574 | 60 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
575 | 60 | std::string result; |
576 | 198 | for (int64_t i = 0; i < offsets.size(); ++i) { |
577 | 138 | const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
578 | 138 | uint32_t size = offsets[i] - offsets[i - 1]; |
579 | | |
580 | 138 | result.clear(); |
581 | 138 | if constexpr (std::is_same_v<OpName, NameToUpper>) { |
582 | 91 | to_upper_utf8(begin, size, result); |
583 | 91 | } else if constexpr (std::is_same_v<OpName, NameToLower>) { |
584 | 47 | to_lower_utf8(begin, size, result); |
585 | 47 | } |
586 | 138 | StringOP::push_value_string(result, i, res_data, res_offsets); |
587 | 138 | } |
588 | 60 | } _ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_ Line | Count | Source | 574 | 21 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 575 | 21 | std::string result; | 576 | 68 | for (int64_t i = 0; i < offsets.size(); ++i) { | 577 | 47 | const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 578 | 47 | uint32_t size = offsets[i] - offsets[i - 1]; | 579 | | | 580 | 47 | result.clear(); | 581 | | if constexpr (std::is_same_v<OpName, NameToUpper>) { | 582 | | to_upper_utf8(begin, size, result); | 583 | 47 | } else if constexpr (std::is_same_v<OpName, NameToLower>) { | 584 | 47 | to_lower_utf8(begin, size, result); | 585 | 47 | } | 586 | 47 | StringOP::push_value_string(result, i, res_data, res_offsets); | 587 | 47 | } | 588 | 21 | } |
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_ Line | Count | Source | 574 | 39 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 575 | 39 | std::string result; | 576 | 130 | for (int64_t i = 0; i < offsets.size(); ++i) { | 577 | 91 | const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]); | 578 | 91 | uint32_t size = offsets[i] - offsets[i - 1]; | 579 | | | 580 | 91 | result.clear(); | 581 | 91 | if constexpr (std::is_same_v<OpName, NameToUpper>) { | 582 | 91 | to_upper_utf8(begin, size, result); | 583 | | } else if constexpr (std::is_same_v<OpName, NameToLower>) { | 584 | | to_lower_utf8(begin, size, result); | 585 | | } | 586 | 91 | StringOP::push_value_string(result, i, res_data, res_offsets); | 587 | 91 | } | 588 | 39 | } |
|
589 | | |
590 | 91 | static void to_upper_utf8(const char* data, uint32_t size, std::string& result) { |
591 | 91 | icu::StringPiece sp; |
592 | 91 | sp.set(data, size); |
593 | 91 | icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp); |
594 | 91 | unicode_str.toUpper(); |
595 | 91 | unicode_str.toUTF8String(result); |
596 | 91 | } |
597 | | |
598 | 47 | static void to_lower_utf8(const char* data, uint32_t size, std::string& result) { |
599 | 47 | icu::StringPiece sp; |
600 | 47 | sp.set(data, size); |
601 | 47 | icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp); |
602 | 47 | unicode_str.toLower(); |
603 | 47 | unicode_str.toUTF8String(result); |
604 | 47 | } |
605 | | }; |
606 | | |
607 | | // Capitalize first letter |
608 | | struct NameToInitcap { |
609 | | static constexpr auto name = "initcap"; |
610 | | }; |
611 | | |
612 | | struct InitcapImpl { |
613 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
614 | 172 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
615 | 172 | res_offsets.resize(offsets.size()); |
616 | | |
617 | 172 | const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()}); |
618 | 172 | if (is_ascii) { |
619 | 114 | impl_vectors_ascii(data, offsets, res_data, res_offsets); |
620 | 114 | } else { |
621 | 58 | impl_vectors_utf8(data, offsets, res_data, res_offsets); |
622 | 58 | } |
623 | 172 | return Status::OK(); |
624 | 172 | } |
625 | | |
626 | | static void impl_vectors_ascii(const ColumnString::Chars& data, |
627 | | const ColumnString::Offsets& offsets, |
628 | | ColumnString::Chars& res_data, |
629 | 114 | ColumnString::Offsets& res_offsets) { |
630 | 114 | size_t offset_size = offsets.size(); |
631 | 114 | memcpy_small_allow_read_write_overflow15( |
632 | 114 | res_offsets.data(), offsets.data(), |
633 | 114 | offset_size * sizeof(ColumnString::Offsets::value_type)); |
634 | | |
635 | 114 | size_t data_length = data.size(); |
636 | 114 | res_data.resize(data_length); |
637 | 114 | simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data()); |
638 | | |
639 | 114 | bool need_capitalize = true; |
640 | 246 | for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) { |
641 | 132 | auto end_index = res_offsets[offset_index]; |
642 | 132 | need_capitalize = true; |
643 | | |
644 | 1.56k | for (size_t i = start_index; i < end_index; ++i) { |
645 | 1.43k | if (!::isalnum(res_data[i])) { |
646 | 216 | need_capitalize = true; |
647 | 1.21k | } else if (need_capitalize) { |
648 | | /* |
649 | | https://en.cppreference.com/w/cpp/string/byte/toupper |
650 | | Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. |
651 | | To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char: |
652 | | char my_toupper(char ch) |
653 | | { |
654 | | return static_cast<char>(std::toupper(static_cast<unsigned char>(ch))); |
655 | | } |
656 | | */ |
657 | 267 | res_data[i] = static_cast<unsigned char>(::toupper(res_data[i])); |
658 | 267 | need_capitalize = false; |
659 | 267 | } |
660 | 1.43k | } |
661 | | |
662 | 132 | start_index = end_index; |
663 | 132 | } |
664 | 114 | } |
665 | | |
666 | | static void impl_vectors_utf8(const ColumnString::Chars& data, |
667 | | const ColumnString::Offsets& offsets, |
668 | | ColumnString::Chars& res_data, |
669 | 58 | ColumnString::Offsets& res_offsets) { |
670 | 58 | std::string result; |
671 | 123 | for (int64_t i = 0; i < offsets.size(); ++i) { |
672 | 65 | const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
673 | 65 | uint32_t size = offsets[i] - offsets[i - 1]; |
674 | 65 | result.clear(); |
675 | 65 | to_initcap_utf8(begin, size, result); |
676 | 65 | StringOP::push_value_string(result, i, res_data, res_offsets); |
677 | 65 | } |
678 | 58 | } |
679 | | |
680 | 65 | static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) { |
681 | 65 | icu::StringPiece sp; |
682 | 65 | sp.set(data, size); |
683 | 65 | icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp); |
684 | 65 | unicode_str.toLower(); |
685 | 65 | icu::UnicodeString output_str; |
686 | 65 | bool need_capitalize = true; |
687 | 65 | icu::StringCharacterIterator iter(unicode_str); |
688 | 647 | for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) { |
689 | 582 | if (!u_isalnum(ch)) { |
690 | 105 | need_capitalize = true; |
691 | 477 | } else if (need_capitalize) { |
692 | 87 | ch = u_toupper(ch); |
693 | 87 | need_capitalize = false; |
694 | 87 | } |
695 | 582 | output_str.append(ch); |
696 | 582 | } |
697 | 65 | output_str.toUTF8String(result); |
698 | 65 | } |
699 | | }; |
700 | | |
701 | | struct NameTrim { |
702 | | static constexpr auto name = "trim"; |
703 | | }; |
704 | | struct NameLTrim { |
705 | | static constexpr auto name = "ltrim"; |
706 | | }; |
707 | | struct NameRTrim { |
708 | | static constexpr auto name = "rtrim"; |
709 | | }; |
710 | | struct NameTrimIn { |
711 | | static constexpr auto name = "trim_in"; |
712 | | }; |
713 | | struct NameLTrimIn { |
714 | | static constexpr auto name = "ltrim_in"; |
715 | | }; |
716 | | struct NameRTrimIn { |
717 | | static constexpr auto name = "rtrim_in"; |
718 | | }; |
719 | | template <bool is_ltrim, bool is_rtrim, bool trim_single> |
720 | | struct TrimUtil { |
721 | | static Status vector(const ColumnString::Chars& str_data, |
722 | | const ColumnString::Offsets& str_offsets, const StringRef& remove_str, |
723 | 300 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
724 | 300 | const size_t offset_size = str_offsets.size(); |
725 | 300 | res_offsets.resize(offset_size); |
726 | 300 | res_data.reserve(str_data.size()); |
727 | 852 | for (size_t i = 0; i < offset_size; ++i) { |
728 | 552 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; |
729 | 552 | const auto* str_end = str_data.data() + str_offsets[i]; |
730 | | |
731 | 552 | if constexpr (is_ltrim) { |
732 | 335 | str_begin = |
733 | 335 | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); |
734 | 335 | } |
735 | 552 | if constexpr (is_rtrim) { |
736 | 395 | str_end = |
737 | 395 | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); |
738 | 395 | } |
739 | | |
740 | 552 | res_data.insert_assume_reserved(str_begin, str_end); |
741 | | // The length of the result of the trim function will never exceed the length of the input. |
742 | 552 | res_offsets[i] = (ColumnString::Offset)res_data.size(); |
743 | 552 | } |
744 | 300 | return Status::OK(); |
745 | 300 | } _ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 723 | 58 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 724 | 58 | const size_t offset_size = str_offsets.size(); | 725 | 58 | res_offsets.resize(offset_size); | 726 | 58 | res_data.reserve(str_data.size()); | 727 | 178 | for (size_t i = 0; i < offset_size; ++i) { | 728 | 120 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; | 729 | 120 | const auto* str_end = str_data.data() + str_offsets[i]; | 730 | | | 731 | 120 | if constexpr (is_ltrim) { | 732 | 120 | str_begin = | 733 | 120 | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); | 734 | 120 | } | 735 | 120 | if constexpr (is_rtrim) { | 736 | 120 | str_end = | 737 | 120 | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); | 738 | 120 | } | 739 | | | 740 | 120 | res_data.insert_assume_reserved(str_begin, str_end); | 741 | | // The length of the result of the trim function will never exceed the length of the input. | 742 | 120 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 743 | 120 | } | 744 | 58 | return Status::OK(); | 745 | 58 | } |
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 723 | 52 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 724 | 52 | const size_t offset_size = str_offsets.size(); | 725 | 52 | res_offsets.resize(offset_size); | 726 | 52 | res_data.reserve(str_data.size()); | 727 | 148 | for (size_t i = 0; i < offset_size; ++i) { | 728 | 96 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; | 729 | 96 | const auto* str_end = str_data.data() + str_offsets[i]; | 730 | | | 731 | 96 | if constexpr (is_ltrim) { | 732 | 96 | str_begin = | 733 | 96 | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); | 734 | 96 | } | 735 | | if constexpr (is_rtrim) { | 736 | | str_end = | 737 | | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); | 738 | | } | 739 | | | 740 | 96 | res_data.insert_assume_reserved(str_begin, str_end); | 741 | | // The length of the result of the trim function will never exceed the length of the input. | 742 | 96 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 743 | 96 | } | 744 | 52 | return Status::OK(); | 745 | 52 | } |
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 723 | 94 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 724 | 94 | const size_t offset_size = str_offsets.size(); | 725 | 94 | res_offsets.resize(offset_size); | 726 | 94 | res_data.reserve(str_data.size()); | 727 | 266 | for (size_t i = 0; i < offset_size; ++i) { | 728 | 172 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; | 729 | 172 | const auto* str_end = str_data.data() + str_offsets[i]; | 730 | | | 731 | | if constexpr (is_ltrim) { | 732 | | str_begin = | 733 | | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); | 734 | | } | 735 | 172 | if constexpr (is_rtrim) { | 736 | 172 | str_end = | 737 | 172 | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); | 738 | 172 | } | 739 | | | 740 | 172 | res_data.insert_assume_reserved(str_begin, str_end); | 741 | | // The length of the result of the trim function will never exceed the length of the input. | 742 | 172 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 743 | 172 | } | 744 | 94 | return Status::OK(); | 745 | 94 | } |
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 723 | 24 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 724 | 24 | const size_t offset_size = str_offsets.size(); | 725 | 24 | res_offsets.resize(offset_size); | 726 | 24 | res_data.reserve(str_data.size()); | 727 | 82 | for (size_t i = 0; i < offset_size; ++i) { | 728 | 58 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; | 729 | 58 | const auto* str_end = str_data.data() + str_offsets[i]; | 730 | | | 731 | 58 | if constexpr (is_ltrim) { | 732 | 58 | str_begin = | 733 | 58 | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); | 734 | 58 | } | 735 | 58 | if constexpr (is_rtrim) { | 736 | 58 | str_end = | 737 | 58 | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); | 738 | 58 | } | 739 | | | 740 | 58 | res_data.insert_assume_reserved(str_begin, str_end); | 741 | | // The length of the result of the trim function will never exceed the length of the input. | 742 | 58 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 743 | 58 | } | 744 | 24 | return Status::OK(); | 745 | 24 | } |
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 723 | 27 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 724 | 27 | const size_t offset_size = str_offsets.size(); | 725 | 27 | res_offsets.resize(offset_size); | 726 | 27 | res_data.reserve(str_data.size()); | 727 | 88 | for (size_t i = 0; i < offset_size; ++i) { | 728 | 61 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; | 729 | 61 | const auto* str_end = str_data.data() + str_offsets[i]; | 730 | | | 731 | 61 | if constexpr (is_ltrim) { | 732 | 61 | str_begin = | 733 | 61 | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); | 734 | 61 | } | 735 | | if constexpr (is_rtrim) { | 736 | | str_end = | 737 | | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); | 738 | | } | 739 | | | 740 | 61 | res_data.insert_assume_reserved(str_begin, str_end); | 741 | | // The length of the result of the trim function will never exceed the length of the input. | 742 | 61 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 743 | 61 | } | 744 | 27 | return Status::OK(); | 745 | 27 | } |
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 723 | 45 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 724 | 45 | const size_t offset_size = str_offsets.size(); | 725 | 45 | res_offsets.resize(offset_size); | 726 | 45 | res_data.reserve(str_data.size()); | 727 | 90 | for (size_t i = 0; i < offset_size; ++i) { | 728 | 45 | const auto* str_begin = str_data.data() + str_offsets[i - 1]; | 729 | 45 | const auto* str_end = str_data.data() + str_offsets[i]; | 730 | | | 731 | | if constexpr (is_ltrim) { | 732 | | str_begin = | 733 | | simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str); | 734 | | } | 735 | 45 | if constexpr (is_rtrim) { | 736 | 45 | str_end = | 737 | 45 | simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str); | 738 | 45 | } | 739 | | | 740 | 45 | res_data.insert_assume_reserved(str_begin, str_end); | 741 | | // The length of the result of the trim function will never exceed the length of the input. | 742 | 45 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 743 | 45 | } | 744 | 45 | return Status::OK(); | 745 | 45 | } |
|
746 | | }; |
747 | | template <bool is_ltrim, bool is_rtrim, bool trim_single> |
748 | | struct TrimInUtil { |
749 | | static Status vector(const ColumnString::Chars& str_data, |
750 | | const ColumnString::Offsets& str_offsets, const StringRef& remove_str, |
751 | 121 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
752 | 121 | const size_t offset_size = str_offsets.size(); |
753 | 121 | res_offsets.resize(offset_size); |
754 | 121 | res_data.reserve(str_data.size()); |
755 | 121 | bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) && |
756 | 121 | simd::VStringFunctions::is_ascii(StringRef( |
757 | 76 | reinterpret_cast<const char*>(str_data.data()), str_data.size())); |
758 | | |
759 | 121 | if (all_ascii) { |
760 | 68 | return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets); |
761 | 68 | } else { |
762 | 53 | return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets); |
763 | 53 | } |
764 | 121 | } _ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 751 | 43 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 752 | 43 | const size_t offset_size = str_offsets.size(); | 753 | 43 | res_offsets.resize(offset_size); | 754 | 43 | res_data.reserve(str_data.size()); | 755 | 43 | bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) && | 756 | 43 | simd::VStringFunctions::is_ascii(StringRef( | 757 | 28 | reinterpret_cast<const char*>(str_data.data()), str_data.size())); | 758 | | | 759 | 43 | if (all_ascii) { | 760 | 24 | return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets); | 761 | 24 | } else { | 762 | 19 | return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets); | 763 | 19 | } | 764 | 43 | } |
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 751 | 36 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 752 | 36 | const size_t offset_size = str_offsets.size(); | 753 | 36 | res_offsets.resize(offset_size); | 754 | 36 | res_data.reserve(str_data.size()); | 755 | 36 | bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) && | 756 | 36 | simd::VStringFunctions::is_ascii(StringRef( | 757 | 21 | reinterpret_cast<const char*>(str_data.data()), str_data.size())); | 758 | | | 759 | 36 | if (all_ascii) { | 760 | 19 | return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets); | 761 | 19 | } else { | 762 | 17 | return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets); | 763 | 17 | } | 764 | 36 | } |
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 751 | 42 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { | 752 | 42 | const size_t offset_size = str_offsets.size(); | 753 | 42 | res_offsets.resize(offset_size); | 754 | 42 | res_data.reserve(str_data.size()); | 755 | 42 | bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) && | 756 | 42 | simd::VStringFunctions::is_ascii(StringRef( | 757 | 27 | reinterpret_cast<const char*>(str_data.data()), str_data.size())); | 758 | | | 759 | 42 | if (all_ascii) { | 760 | 25 | return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets); | 761 | 25 | } else { | 762 | 17 | return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets); | 763 | 17 | } | 764 | 42 | } |
|
765 | | |
766 | | private: |
767 | | static Status impl_vectors_ascii(const ColumnString::Chars& str_data, |
768 | | const ColumnString::Offsets& str_offsets, |
769 | | const StringRef& remove_str, ColumnString::Chars& res_data, |
770 | 68 | ColumnString::Offsets& res_offsets) { |
771 | 68 | const size_t offset_size = str_offsets.size(); |
772 | 68 | std::bitset<128> char_lookup; |
773 | 68 | const char* remove_begin = remove_str.data; |
774 | 68 | const char* remove_end = remove_str.data + remove_str.size; |
775 | | |
776 | 251 | while (remove_begin < remove_end) { |
777 | 183 | char_lookup.set(static_cast<unsigned char>(*remove_begin)); |
778 | 183 | remove_begin += 1; |
779 | 183 | } |
780 | | |
781 | 136 | for (size_t i = 0; i < offset_size; ++i) { |
782 | 68 | const char* str_begin = |
783 | 68 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); |
784 | 68 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); |
785 | 68 | const char* left_trim_pos = str_begin; |
786 | 68 | const char* right_trim_pos = str_end; |
787 | | |
788 | 68 | if constexpr (is_ltrim) { |
789 | 127 | while (left_trim_pos < str_end) { |
790 | 114 | if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) { |
791 | 30 | break; |
792 | 30 | } |
793 | 84 | ++left_trim_pos; |
794 | 84 | } |
795 | 43 | } |
796 | | |
797 | 68 | if constexpr (is_rtrim) { |
798 | 114 | while (right_trim_pos > left_trim_pos) { |
799 | 100 | --right_trim_pos; |
800 | 100 | if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) { |
801 | 35 | ++right_trim_pos; |
802 | 35 | break; |
803 | 35 | } |
804 | 100 | } |
805 | 49 | } |
806 | | |
807 | 68 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); |
808 | | // The length of the result of the trim function will never exceed the length of the input. |
809 | 68 | res_offsets[i] = (ColumnString::Offset)res_data.size(); |
810 | 68 | } |
811 | | |
812 | 68 | return Status::OK(); |
813 | 68 | } _ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 770 | 24 | ColumnString::Offsets& res_offsets) { | 771 | 24 | const size_t offset_size = str_offsets.size(); | 772 | 24 | std::bitset<128> char_lookup; | 773 | 24 | const char* remove_begin = remove_str.data; | 774 | 24 | const char* remove_end = remove_str.data + remove_str.size; | 775 | | | 776 | 86 | while (remove_begin < remove_end) { | 777 | 62 | char_lookup.set(static_cast<unsigned char>(*remove_begin)); | 778 | 62 | remove_begin += 1; | 779 | 62 | } | 780 | | | 781 | 48 | for (size_t i = 0; i < offset_size; ++i) { | 782 | 24 | const char* str_begin = | 783 | 24 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); | 784 | 24 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); | 785 | 24 | const char* left_trim_pos = str_begin; | 786 | 24 | const char* right_trim_pos = str_end; | 787 | | | 788 | 24 | if constexpr (is_ltrim) { | 789 | 57 | while (left_trim_pos < str_end) { | 790 | 50 | if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) { | 791 | 17 | break; | 792 | 17 | } | 793 | 33 | ++left_trim_pos; | 794 | 33 | } | 795 | 24 | } | 796 | | | 797 | 24 | if constexpr (is_rtrim) { | 798 | 39 | while (right_trim_pos > left_trim_pos) { | 799 | 32 | --right_trim_pos; | 800 | 32 | if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) { | 801 | 17 | ++right_trim_pos; | 802 | 17 | break; | 803 | 17 | } | 804 | 32 | } | 805 | 24 | } | 806 | | | 807 | 24 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); | 808 | | // The length of the result of the trim function will never exceed the length of the input. | 809 | 24 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 810 | 24 | } | 811 | | | 812 | 24 | return Status::OK(); | 813 | 24 | } |
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 770 | 19 | ColumnString::Offsets& res_offsets) { | 771 | 19 | const size_t offset_size = str_offsets.size(); | 772 | 19 | std::bitset<128> char_lookup; | 773 | 19 | const char* remove_begin = remove_str.data; | 774 | 19 | const char* remove_end = remove_str.data + remove_str.size; | 775 | | | 776 | 73 | while (remove_begin < remove_end) { | 777 | 54 | char_lookup.set(static_cast<unsigned char>(*remove_begin)); | 778 | 54 | remove_begin += 1; | 779 | 54 | } | 780 | | | 781 | 38 | for (size_t i = 0; i < offset_size; ++i) { | 782 | 19 | const char* str_begin = | 783 | 19 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); | 784 | 19 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); | 785 | 19 | const char* left_trim_pos = str_begin; | 786 | 19 | const char* right_trim_pos = str_end; | 787 | | | 788 | 19 | if constexpr (is_ltrim) { | 789 | 70 | while (left_trim_pos < str_end) { | 790 | 64 | if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) { | 791 | 13 | break; | 792 | 13 | } | 793 | 51 | ++left_trim_pos; | 794 | 51 | } | 795 | 19 | } | 796 | | | 797 | | if constexpr (is_rtrim) { | 798 | | while (right_trim_pos > left_trim_pos) { | 799 | | --right_trim_pos; | 800 | | if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) { | 801 | | ++right_trim_pos; | 802 | | break; | 803 | | } | 804 | | } | 805 | | } | 806 | | | 807 | 19 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); | 808 | | // The length of the result of the trim function will never exceed the length of the input. | 809 | 19 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 810 | 19 | } | 811 | | | 812 | 19 | return Status::OK(); | 813 | 19 | } |
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 770 | 25 | ColumnString::Offsets& res_offsets) { | 771 | 25 | const size_t offset_size = str_offsets.size(); | 772 | 25 | std::bitset<128> char_lookup; | 773 | 25 | const char* remove_begin = remove_str.data; | 774 | 25 | const char* remove_end = remove_str.data + remove_str.size; | 775 | | | 776 | 92 | while (remove_begin < remove_end) { | 777 | 67 | char_lookup.set(static_cast<unsigned char>(*remove_begin)); | 778 | 67 | remove_begin += 1; | 779 | 67 | } | 780 | | | 781 | 50 | for (size_t i = 0; i < offset_size; ++i) { | 782 | 25 | const char* str_begin = | 783 | 25 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); | 784 | 25 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); | 785 | 25 | const char* left_trim_pos = str_begin; | 786 | 25 | const char* right_trim_pos = str_end; | 787 | | | 788 | | if constexpr (is_ltrim) { | 789 | | while (left_trim_pos < str_end) { | 790 | | if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) { | 791 | | break; | 792 | | } | 793 | | ++left_trim_pos; | 794 | | } | 795 | | } | 796 | | | 797 | 25 | if constexpr (is_rtrim) { | 798 | 75 | while (right_trim_pos > left_trim_pos) { | 799 | 68 | --right_trim_pos; | 800 | 68 | if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) { | 801 | 18 | ++right_trim_pos; | 802 | 18 | break; | 803 | 18 | } | 804 | 68 | } | 805 | 25 | } | 806 | | | 807 | 25 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); | 808 | | // The length of the result of the trim function will never exceed the length of the input. | 809 | 25 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 810 | 25 | } | 811 | | | 812 | 25 | return Status::OK(); | 813 | 25 | } |
|
814 | | |
815 | | static Status impl_vectors_utf8(const ColumnString::Chars& str_data, |
816 | | const ColumnString::Offsets& str_offsets, |
817 | | const StringRef& remove_str, ColumnString::Chars& res_data, |
818 | 53 | ColumnString::Offsets& res_offsets) { |
819 | 53 | const size_t offset_size = str_offsets.size(); |
820 | 53 | res_offsets.resize(offset_size); |
821 | 53 | res_data.reserve(str_data.size()); |
822 | | |
823 | 53 | std::unordered_set<std::string_view> char_lookup; |
824 | 53 | const char* remove_begin = remove_str.data; |
825 | 53 | const char* remove_end = remove_str.data + remove_str.size; |
826 | | |
827 | 240 | while (remove_begin < remove_end) { |
828 | 187 | size_t byte_len, char_len; |
829 | 187 | std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length( |
830 | 187 | remove_begin, remove_end, 1); |
831 | 187 | char_lookup.insert(std::string_view(remove_begin, byte_len)); |
832 | 187 | remove_begin += byte_len; |
833 | 187 | } |
834 | | |
835 | 140 | for (size_t i = 0; i < offset_size; ++i) { |
836 | 87 | const char* str_begin = |
837 | 87 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); |
838 | 87 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); |
839 | 87 | const char* left_trim_pos = str_begin; |
840 | 87 | const char* right_trim_pos = str_end; |
841 | | |
842 | 87 | if constexpr (is_ltrim) { |
843 | 81 | while (left_trim_pos < str_end) { |
844 | 73 | size_t byte_len, char_len; |
845 | 73 | std::tie(byte_len, char_len) = |
846 | 73 | simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos, |
847 | 73 | str_end, 1); |
848 | 73 | if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) == |
849 | 73 | char_lookup.end()) { |
850 | 52 | break; |
851 | 52 | } |
852 | 21 | left_trim_pos += byte_len; |
853 | 21 | } |
854 | 60 | } |
855 | | |
856 | 87 | if constexpr (is_rtrim) { |
857 | 88 | while (right_trim_pos > left_trim_pos) { |
858 | 80 | const char* prev_char_pos = right_trim_pos; |
859 | 156 | do { |
860 | 156 | --prev_char_pos; |
861 | 156 | } while ((*prev_char_pos & 0xC0) == 0x80); |
862 | 80 | size_t byte_len = right_trim_pos - prev_char_pos; |
863 | 80 | if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) == |
864 | 80 | char_lookup.end()) { |
865 | 52 | break; |
866 | 52 | } |
867 | 28 | right_trim_pos = prev_char_pos; |
868 | 28 | } |
869 | 60 | } |
870 | | |
871 | 87 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); |
872 | | // The length of the result of the trim function will never exceed the length of the input. |
873 | 87 | res_offsets[i] = (ColumnString::Offset)res_data.size(); |
874 | 87 | } |
875 | 53 | return Status::OK(); |
876 | 53 | } _ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 818 | 19 | ColumnString::Offsets& res_offsets) { | 819 | 19 | const size_t offset_size = str_offsets.size(); | 820 | 19 | res_offsets.resize(offset_size); | 821 | 19 | res_data.reserve(str_data.size()); | 822 | | | 823 | 19 | std::unordered_set<std::string_view> char_lookup; | 824 | 19 | const char* remove_begin = remove_str.data; | 825 | 19 | const char* remove_end = remove_str.data + remove_str.size; | 826 | | | 827 | 84 | while (remove_begin < remove_end) { | 828 | 65 | size_t byte_len, char_len; | 829 | 65 | std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length( | 830 | 65 | remove_begin, remove_end, 1); | 831 | 65 | char_lookup.insert(std::string_view(remove_begin, byte_len)); | 832 | 65 | remove_begin += byte_len; | 833 | 65 | } | 834 | | | 835 | 52 | for (size_t i = 0; i < offset_size; ++i) { | 836 | 33 | const char* str_begin = | 837 | 33 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); | 838 | 33 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); | 839 | 33 | const char* left_trim_pos = str_begin; | 840 | 33 | const char* right_trim_pos = str_end; | 841 | | | 842 | 33 | if constexpr (is_ltrim) { | 843 | 45 | while (left_trim_pos < str_end) { | 844 | 41 | size_t byte_len, char_len; | 845 | 41 | std::tie(byte_len, char_len) = | 846 | 41 | simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos, | 847 | 41 | str_end, 1); | 848 | 41 | if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) == | 849 | 41 | char_lookup.end()) { | 850 | 29 | break; | 851 | 29 | } | 852 | 12 | left_trim_pos += byte_len; | 853 | 12 | } | 854 | 33 | } | 855 | | | 856 | 33 | if constexpr (is_rtrim) { | 857 | 48 | while (right_trim_pos > left_trim_pos) { | 858 | 44 | const char* prev_char_pos = right_trim_pos; | 859 | 90 | do { | 860 | 90 | --prev_char_pos; | 861 | 90 | } while ((*prev_char_pos & 0xC0) == 0x80); | 862 | 44 | size_t byte_len = right_trim_pos - prev_char_pos; | 863 | 44 | if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) == | 864 | 44 | char_lookup.end()) { | 865 | 29 | break; | 866 | 29 | } | 867 | 15 | right_trim_pos = prev_char_pos; | 868 | 15 | } | 869 | 33 | } | 870 | | | 871 | 33 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); | 872 | | // The length of the result of the trim function will never exceed the length of the input. | 873 | 33 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 874 | 33 | } | 875 | 19 | return Status::OK(); | 876 | 19 | } |
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 818 | 17 | ColumnString::Offsets& res_offsets) { | 819 | 17 | const size_t offset_size = str_offsets.size(); | 820 | 17 | res_offsets.resize(offset_size); | 821 | 17 | res_data.reserve(str_data.size()); | 822 | | | 823 | 17 | std::unordered_set<std::string_view> char_lookup; | 824 | 17 | const char* remove_begin = remove_str.data; | 825 | 17 | const char* remove_end = remove_str.data + remove_str.size; | 826 | | | 827 | 78 | while (remove_begin < remove_end) { | 828 | 61 | size_t byte_len, char_len; | 829 | 61 | std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length( | 830 | 61 | remove_begin, remove_end, 1); | 831 | 61 | char_lookup.insert(std::string_view(remove_begin, byte_len)); | 832 | 61 | remove_begin += byte_len; | 833 | 61 | } | 834 | | | 835 | 44 | for (size_t i = 0; i < offset_size; ++i) { | 836 | 27 | const char* str_begin = | 837 | 27 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); | 838 | 27 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); | 839 | 27 | const char* left_trim_pos = str_begin; | 840 | 27 | const char* right_trim_pos = str_end; | 841 | | | 842 | 27 | if constexpr (is_ltrim) { | 843 | 36 | while (left_trim_pos < str_end) { | 844 | 32 | size_t byte_len, char_len; | 845 | 32 | std::tie(byte_len, char_len) = | 846 | 32 | simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos, | 847 | 32 | str_end, 1); | 848 | 32 | if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) == | 849 | 32 | char_lookup.end()) { | 850 | 23 | break; | 851 | 23 | } | 852 | 9 | left_trim_pos += byte_len; | 853 | 9 | } | 854 | 27 | } | 855 | | | 856 | | if constexpr (is_rtrim) { | 857 | | while (right_trim_pos > left_trim_pos) { | 858 | | const char* prev_char_pos = right_trim_pos; | 859 | | do { | 860 | | --prev_char_pos; | 861 | | } while ((*prev_char_pos & 0xC0) == 0x80); | 862 | | size_t byte_len = right_trim_pos - prev_char_pos; | 863 | | if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) == | 864 | | char_lookup.end()) { | 865 | | break; | 866 | | } | 867 | | right_trim_pos = prev_char_pos; | 868 | | } | 869 | | } | 870 | | | 871 | 27 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); | 872 | | // The length of the result of the trim function will never exceed the length of the input. | 873 | 27 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 874 | 27 | } | 875 | 17 | return Status::OK(); | 876 | 17 | } |
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_ Line | Count | Source | 818 | 17 | ColumnString::Offsets& res_offsets) { | 819 | 17 | const size_t offset_size = str_offsets.size(); | 820 | 17 | res_offsets.resize(offset_size); | 821 | 17 | res_data.reserve(str_data.size()); | 822 | | | 823 | 17 | std::unordered_set<std::string_view> char_lookup; | 824 | 17 | const char* remove_begin = remove_str.data; | 825 | 17 | const char* remove_end = remove_str.data + remove_str.size; | 826 | | | 827 | 78 | while (remove_begin < remove_end) { | 828 | 61 | size_t byte_len, char_len; | 829 | 61 | std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length( | 830 | 61 | remove_begin, remove_end, 1); | 831 | 61 | char_lookup.insert(std::string_view(remove_begin, byte_len)); | 832 | 61 | remove_begin += byte_len; | 833 | 61 | } | 834 | | | 835 | 44 | for (size_t i = 0; i < offset_size; ++i) { | 836 | 27 | const char* str_begin = | 837 | 27 | reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]); | 838 | 27 | const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]); | 839 | 27 | const char* left_trim_pos = str_begin; | 840 | 27 | const char* right_trim_pos = str_end; | 841 | | | 842 | | if constexpr (is_ltrim) { | 843 | | while (left_trim_pos < str_end) { | 844 | | size_t byte_len, char_len; | 845 | | std::tie(byte_len, char_len) = | 846 | | simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos, | 847 | | str_end, 1); | 848 | | if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) == | 849 | | char_lookup.end()) { | 850 | | break; | 851 | | } | 852 | | left_trim_pos += byte_len; | 853 | | } | 854 | | } | 855 | | | 856 | 27 | if constexpr (is_rtrim) { | 857 | 40 | while (right_trim_pos > left_trim_pos) { | 858 | 36 | const char* prev_char_pos = right_trim_pos; | 859 | 66 | do { | 860 | 66 | --prev_char_pos; | 861 | 66 | } while ((*prev_char_pos & 0xC0) == 0x80); | 862 | 36 | size_t byte_len = right_trim_pos - prev_char_pos; | 863 | 36 | if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) == | 864 | 36 | char_lookup.end()) { | 865 | 23 | break; | 866 | 23 | } | 867 | 13 | right_trim_pos = prev_char_pos; | 868 | 13 | } | 869 | 27 | } | 870 | | | 871 | 27 | res_data.insert_assume_reserved(left_trim_pos, right_trim_pos); | 872 | | // The length of the result of the trim function will never exceed the length of the input. | 873 | 27 | res_offsets[i] = (ColumnString::Offset)res_data.size(); | 874 | 27 | } | 875 | 17 | return Status::OK(); | 876 | 17 | } |
|
877 | | }; |
878 | | // This is an implementation of a parameter for the Trim function. |
879 | | template <bool is_ltrim, bool is_rtrim, typename Name> |
880 | | struct Trim1Impl { |
881 | | static constexpr auto name = Name::name; |
882 | | |
883 | 157 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv Line | Count | Source | 883 | 45 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } |
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv Line | Count | Source | 883 | 35 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } |
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv Line | Count | Source | 883 | 41 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } |
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv Line | Count | Source | 883 | 9 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } |
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv Line | Count | Source | 883 | 13 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } |
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv Line | Count | Source | 883 | 14 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; } |
|
884 | | |
885 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
886 | 138 | uint32_t result, size_t input_rows_count) { |
887 | 138 | const ColumnPtr column = block.get_by_position(arguments[0]).column; |
888 | 138 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { |
889 | 138 | auto col_res = ColumnString::create(); |
890 | 138 | char blank[] = " "; |
891 | 138 | const StringRef remove_str(blank, 1); |
892 | 138 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( |
893 | 138 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), |
894 | 138 | col_res->get_offsets()))); |
895 | 138 | block.replace_by_position(result, std::move(col_res)); |
896 | 138 | } else { |
897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
898 | 0 | block.get_by_position(arguments[0]).column->get_name(), |
899 | 0 | name); |
900 | 0 | } |
901 | 138 | return Status::OK(); |
902 | 138 | } _ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 886 | 48 | uint32_t result, size_t input_rows_count) { | 887 | 48 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 888 | 48 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 889 | 48 | auto col_res = ColumnString::create(); | 890 | 48 | char blank[] = " "; | 891 | 48 | const StringRef remove_str(blank, 1); | 892 | 48 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 893 | 48 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 894 | 48 | col_res->get_offsets()))); | 895 | 48 | block.replace_by_position(result, std::move(col_res)); | 896 | 48 | } else { | 897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 898 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 899 | 0 | name); | 900 | 0 | } | 901 | 48 | return Status::OK(); | 902 | 48 | } |
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 886 | 37 | uint32_t result, size_t input_rows_count) { | 887 | 37 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 888 | 37 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 889 | 37 | auto col_res = ColumnString::create(); | 890 | 37 | char blank[] = " "; | 891 | 37 | const StringRef remove_str(blank, 1); | 892 | 37 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 893 | 37 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 894 | 37 | col_res->get_offsets()))); | 895 | 37 | block.replace_by_position(result, std::move(col_res)); | 896 | 37 | } else { | 897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 898 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 899 | 0 | name); | 900 | 0 | } | 901 | 37 | return Status::OK(); | 902 | 37 | } |
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 886 | 41 | uint32_t result, size_t input_rows_count) { | 887 | 41 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 888 | 41 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 889 | 41 | auto col_res = ColumnString::create(); | 890 | 41 | char blank[] = " "; | 891 | 41 | const StringRef remove_str(blank, 1); | 892 | 41 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 893 | 41 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 894 | 41 | col_res->get_offsets()))); | 895 | 41 | block.replace_by_position(result, std::move(col_res)); | 896 | 41 | } else { | 897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 898 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 899 | 0 | name); | 900 | 0 | } | 901 | 41 | return Status::OK(); | 902 | 41 | } |
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 886 | 1 | uint32_t result, size_t input_rows_count) { | 887 | 1 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 888 | 1 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 889 | 1 | auto col_res = ColumnString::create(); | 890 | 1 | char blank[] = " "; | 891 | 1 | const StringRef remove_str(blank, 1); | 892 | 1 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 893 | 1 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 894 | 1 | col_res->get_offsets()))); | 895 | 1 | block.replace_by_position(result, std::move(col_res)); | 896 | 1 | } else { | 897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 898 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 899 | 0 | name); | 900 | 0 | } | 901 | 1 | return Status::OK(); | 902 | 1 | } |
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 886 | 5 | uint32_t result, size_t input_rows_count) { | 887 | 5 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 888 | 5 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 889 | 5 | auto col_res = ColumnString::create(); | 890 | 5 | char blank[] = " "; | 891 | 5 | const StringRef remove_str(blank, 1); | 892 | 5 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 893 | 5 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 894 | 5 | col_res->get_offsets()))); | 895 | 5 | block.replace_by_position(result, std::move(col_res)); | 896 | 5 | } else { | 897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 898 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 899 | 0 | name); | 900 | 0 | } | 901 | 5 | return Status::OK(); | 902 | 5 | } |
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 886 | 6 | uint32_t result, size_t input_rows_count) { | 887 | 6 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 888 | 6 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 889 | 6 | auto col_res = ColumnString::create(); | 890 | 6 | char blank[] = " "; | 891 | 6 | const StringRef remove_str(blank, 1); | 892 | 6 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 893 | 6 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 894 | 6 | col_res->get_offsets()))); | 895 | 6 | block.replace_by_position(result, std::move(col_res)); | 896 | 6 | } else { | 897 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 898 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 899 | 0 | name); | 900 | 0 | } | 901 | 6 | return Status::OK(); | 902 | 6 | } |
|
903 | | }; |
904 | | |
905 | | // This is an implementation of two parameters for the Trim function. |
906 | | template <bool is_ltrim, bool is_rtrim, typename Name> |
907 | | struct Trim2Impl { |
908 | | static constexpr auto name = Name::name; |
909 | | |
910 | 226 | static DataTypes get_variadic_argument_types() { |
911 | 226 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; |
912 | 226 | } _ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv Line | Count | Source | 910 | 20 | static DataTypes get_variadic_argument_types() { | 911 | 20 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 912 | 20 | } |
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv Line | Count | Source | 910 | 29 | static DataTypes get_variadic_argument_types() { | 911 | 29 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 912 | 29 | } |
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv Line | Count | Source | 910 | 84 | static DataTypes get_variadic_argument_types() { | 911 | 84 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 912 | 84 | } |
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv Line | Count | Source | 910 | 27 | static DataTypes get_variadic_argument_types() { | 911 | 27 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 912 | 27 | } |
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv Line | Count | Source | 910 | 29 | static DataTypes get_variadic_argument_types() { | 911 | 29 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 912 | 29 | } |
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv Line | Count | Source | 910 | 37 | static DataTypes get_variadic_argument_types() { | 911 | 37 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 912 | 37 | } |
|
913 | | |
914 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
915 | 282 | uint32_t result, size_t input_rows_count) { |
916 | 282 | const ColumnPtr column = block.get_by_position(arguments[0]).column; |
917 | 282 | const auto& rcol = |
918 | 282 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) |
919 | 282 | ->get_data_column_ptr(); |
920 | 282 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { |
921 | 282 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { |
922 | 282 | auto col_res = ColumnString::create(); |
923 | 282 | const auto* remove_str_raw = col_right->get_chars().data(); |
924 | 282 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; |
925 | 282 | const StringRef remove_str(remove_str_raw, remove_str_size); |
926 | | |
927 | 282 | if (remove_str.size == 1) { |
928 | 65 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( |
929 | 65 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), |
930 | 65 | col_res->get_offsets()))); |
931 | 217 | } else { |
932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || |
933 | | std::is_same<Name, NameLTrimIn>::value || |
934 | 121 | std::is_same<Name, NameRTrimIn>::value) { |
935 | 121 | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( |
936 | 121 | col->get_chars(), col->get_offsets(), remove_str, |
937 | 121 | col_res->get_chars(), col_res->get_offsets()))); |
938 | 121 | } else { |
939 | 96 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( |
940 | 96 | col->get_chars(), col->get_offsets(), remove_str, |
941 | 96 | col_res->get_chars(), col_res->get_offsets()))); |
942 | 96 | } |
943 | 217 | } |
944 | 282 | block.replace_by_position(result, std::move(col_res)); |
945 | 282 | } else { |
946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
947 | 0 | block.get_by_position(arguments[1]).column->get_name(), |
948 | 0 | name); |
949 | 0 | } |
950 | | |
951 | 282 | } else { |
952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
953 | 0 | block.get_by_position(arguments[0]).column->get_name(), |
954 | 0 | name); |
955 | 0 | } |
956 | 282 | return Status::OK(); |
957 | 282 | } _ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 915 | 26 | uint32_t result, size_t input_rows_count) { | 916 | 26 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 917 | 26 | const auto& rcol = | 918 | 26 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) | 919 | 26 | ->get_data_column_ptr(); | 920 | 26 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 921 | 26 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { | 922 | 26 | auto col_res = ColumnString::create(); | 923 | 26 | const auto* remove_str_raw = col_right->get_chars().data(); | 924 | 26 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; | 925 | 26 | const StringRef remove_str(remove_str_raw, remove_str_size); | 926 | | | 927 | 26 | if (remove_str.size == 1) { | 928 | 2 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 929 | 2 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 930 | 2 | col_res->get_offsets()))); | 931 | 24 | } else { | 932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || | 933 | | std::is_same<Name, NameLTrimIn>::value || | 934 | | std::is_same<Name, NameRTrimIn>::value) { | 935 | | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( | 936 | | col->get_chars(), col->get_offsets(), remove_str, | 937 | | col_res->get_chars(), col_res->get_offsets()))); | 938 | 24 | } else { | 939 | 24 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( | 940 | 24 | col->get_chars(), col->get_offsets(), remove_str, | 941 | 24 | col_res->get_chars(), col_res->get_offsets()))); | 942 | 24 | } | 943 | 24 | } | 944 | 26 | block.replace_by_position(result, std::move(col_res)); | 945 | 26 | } else { | 946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 947 | 0 | block.get_by_position(arguments[1]).column->get_name(), | 948 | 0 | name); | 949 | 0 | } | 950 | | | 951 | 26 | } else { | 952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 953 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 954 | 0 | name); | 955 | 0 | } | 956 | 26 | return Status::OK(); | 957 | 26 | } |
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 915 | 32 | uint32_t result, size_t input_rows_count) { | 916 | 32 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 917 | 32 | const auto& rcol = | 918 | 32 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) | 919 | 32 | ->get_data_column_ptr(); | 920 | 32 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 921 | 32 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { | 922 | 32 | auto col_res = ColumnString::create(); | 923 | 32 | const auto* remove_str_raw = col_right->get_chars().data(); | 924 | 32 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; | 925 | 32 | const StringRef remove_str(remove_str_raw, remove_str_size); | 926 | | | 927 | 32 | if (remove_str.size == 1) { | 928 | 5 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 929 | 5 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 930 | 5 | col_res->get_offsets()))); | 931 | 27 | } else { | 932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || | 933 | | std::is_same<Name, NameLTrimIn>::value || | 934 | | std::is_same<Name, NameRTrimIn>::value) { | 935 | | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( | 936 | | col->get_chars(), col->get_offsets(), remove_str, | 937 | | col_res->get_chars(), col_res->get_offsets()))); | 938 | 27 | } else { | 939 | 27 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( | 940 | 27 | col->get_chars(), col->get_offsets(), remove_str, | 941 | 27 | col_res->get_chars(), col_res->get_offsets()))); | 942 | 27 | } | 943 | 27 | } | 944 | 32 | block.replace_by_position(result, std::move(col_res)); | 945 | 32 | } else { | 946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 947 | 0 | block.get_by_position(arguments[1]).column->get_name(), | 948 | 0 | name); | 949 | 0 | } | 950 | | | 951 | 32 | } else { | 952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 953 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 954 | 0 | name); | 955 | 0 | } | 956 | 32 | return Status::OK(); | 957 | 32 | } |
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 915 | 85 | uint32_t result, size_t input_rows_count) { | 916 | 85 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 917 | 85 | const auto& rcol = | 918 | 85 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) | 919 | 85 | ->get_data_column_ptr(); | 920 | 85 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 921 | 85 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { | 922 | 85 | auto col_res = ColumnString::create(); | 923 | 85 | const auto* remove_str_raw = col_right->get_chars().data(); | 924 | 85 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; | 925 | 85 | const StringRef remove_str(remove_str_raw, remove_str_size); | 926 | | | 927 | 85 | if (remove_str.size == 1) { | 928 | 40 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 929 | 40 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 930 | 40 | col_res->get_offsets()))); | 931 | 45 | } else { | 932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || | 933 | | std::is_same<Name, NameLTrimIn>::value || | 934 | | std::is_same<Name, NameRTrimIn>::value) { | 935 | | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( | 936 | | col->get_chars(), col->get_offsets(), remove_str, | 937 | | col_res->get_chars(), col_res->get_offsets()))); | 938 | 45 | } else { | 939 | 45 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( | 940 | 45 | col->get_chars(), col->get_offsets(), remove_str, | 941 | 45 | col_res->get_chars(), col_res->get_offsets()))); | 942 | 45 | } | 943 | 45 | } | 944 | 85 | block.replace_by_position(result, std::move(col_res)); | 945 | 85 | } else { | 946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 947 | 0 | block.get_by_position(arguments[1]).column->get_name(), | 948 | 0 | name); | 949 | 0 | } | 950 | | | 951 | 85 | } else { | 952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 953 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 954 | 0 | name); | 955 | 0 | } | 956 | 85 | return Status::OK(); | 957 | 85 | } |
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 915 | 50 | uint32_t result, size_t input_rows_count) { | 916 | 50 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 917 | 50 | const auto& rcol = | 918 | 50 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) | 919 | 50 | ->get_data_column_ptr(); | 920 | 50 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 921 | 50 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { | 922 | 50 | auto col_res = ColumnString::create(); | 923 | 50 | const auto* remove_str_raw = col_right->get_chars().data(); | 924 | 50 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; | 925 | 50 | const StringRef remove_str(remove_str_raw, remove_str_size); | 926 | | | 927 | 50 | if (remove_str.size == 1) { | 928 | 7 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 929 | 7 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 930 | 7 | col_res->get_offsets()))); | 931 | 43 | } else { | 932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || | 933 | | std::is_same<Name, NameLTrimIn>::value || | 934 | 43 | std::is_same<Name, NameRTrimIn>::value) { | 935 | 43 | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( | 936 | 43 | col->get_chars(), col->get_offsets(), remove_str, | 937 | 43 | col_res->get_chars(), col_res->get_offsets()))); | 938 | | } else { | 939 | | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( | 940 | | col->get_chars(), col->get_offsets(), remove_str, | 941 | | col_res->get_chars(), col_res->get_offsets()))); | 942 | | } | 943 | 43 | } | 944 | 50 | block.replace_by_position(result, std::move(col_res)); | 945 | 50 | } else { | 946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 947 | 0 | block.get_by_position(arguments[1]).column->get_name(), | 948 | 0 | name); | 949 | 0 | } | 950 | | | 951 | 50 | } else { | 952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 953 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 954 | 0 | name); | 955 | 0 | } | 956 | 50 | return Status::OK(); | 957 | 50 | } |
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 915 | 41 | uint32_t result, size_t input_rows_count) { | 916 | 41 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 917 | 41 | const auto& rcol = | 918 | 41 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) | 919 | 41 | ->get_data_column_ptr(); | 920 | 41 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 921 | 41 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { | 922 | 41 | auto col_res = ColumnString::create(); | 923 | 41 | const auto* remove_str_raw = col_right->get_chars().data(); | 924 | 41 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; | 925 | 41 | const StringRef remove_str(remove_str_raw, remove_str_size); | 926 | | | 927 | 41 | if (remove_str.size == 1) { | 928 | 5 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 929 | 5 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 930 | 5 | col_res->get_offsets()))); | 931 | 36 | } else { | 932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || | 933 | | std::is_same<Name, NameLTrimIn>::value || | 934 | 36 | std::is_same<Name, NameRTrimIn>::value) { | 935 | 36 | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( | 936 | 36 | col->get_chars(), col->get_offsets(), remove_str, | 937 | 36 | col_res->get_chars(), col_res->get_offsets()))); | 938 | | } else { | 939 | | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( | 940 | | col->get_chars(), col->get_offsets(), remove_str, | 941 | | col_res->get_chars(), col_res->get_offsets()))); | 942 | | } | 943 | 36 | } | 944 | 41 | block.replace_by_position(result, std::move(col_res)); | 945 | 41 | } else { | 946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 947 | 0 | block.get_by_position(arguments[1]).column->get_name(), | 948 | 0 | name); | 949 | 0 | } | 950 | | | 951 | 41 | } else { | 952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 953 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 954 | 0 | name); | 955 | 0 | } | 956 | 41 | return Status::OK(); | 957 | 41 | } |
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 915 | 48 | uint32_t result, size_t input_rows_count) { | 916 | 48 | const ColumnPtr column = block.get_by_position(arguments[0]).column; | 917 | 48 | const auto& rcol = | 918 | 48 | assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get()) | 919 | 48 | ->get_data_column_ptr(); | 920 | 48 | if (const auto* col = assert_cast<const ColumnString*>(column.get())) { | 921 | 48 | if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) { | 922 | 48 | auto col_res = ColumnString::create(); | 923 | 48 | const auto* remove_str_raw = col_right->get_chars().data(); | 924 | 48 | const ColumnString::Offset remove_str_size = col_right->get_offsets()[0]; | 925 | 48 | const StringRef remove_str(remove_str_raw, remove_str_size); | 926 | | | 927 | 48 | if (remove_str.size == 1) { | 928 | 6 | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector( | 929 | 6 | col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(), | 930 | 6 | col_res->get_offsets()))); | 931 | 42 | } else { | 932 | | if constexpr (std::is_same<Name, NameTrimIn>::value || | 933 | | std::is_same<Name, NameLTrimIn>::value || | 934 | 42 | std::is_same<Name, NameRTrimIn>::value) { | 935 | 42 | RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector( | 936 | 42 | col->get_chars(), col->get_offsets(), remove_str, | 937 | 42 | col_res->get_chars(), col_res->get_offsets()))); | 938 | | } else { | 939 | | RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector( | 940 | | col->get_chars(), col->get_offsets(), remove_str, | 941 | | col_res->get_chars(), col_res->get_offsets()))); | 942 | | } | 943 | 42 | } | 944 | 48 | block.replace_by_position(result, std::move(col_res)); | 945 | 48 | } else { | 946 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 947 | 0 | block.get_by_position(arguments[1]).column->get_name(), | 948 | 0 | name); | 949 | 0 | } | 950 | | | 951 | 48 | } else { | 952 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 953 | 0 | block.get_by_position(arguments[0]).column->get_name(), | 954 | 0 | name); | 955 | 0 | } | 956 | 48 | return Status::OK(); | 957 | 48 | } |
|
958 | | }; |
959 | | |
960 | | template <typename impl> |
961 | | class FunctionTrim : public IFunction { |
962 | | public: |
963 | | static constexpr auto name = impl::name; |
964 | 395 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv Line | Count | Source | 964 | 46 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv Line | Count | Source | 964 | 36 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv Line | Count | Source | 964 | 42 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv Line | Count | Source | 964 | 21 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv Line | Count | Source | 964 | 30 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv Line | Count | Source | 964 | 85 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv Line | Count | Source | 964 | 10 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv Line | Count | Source | 964 | 14 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv Line | Count | Source | 964 | 15 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv Line | Count | Source | 964 | 28 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv Line | Count | Source | 964 | 30 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv Line | Count | Source | 964 | 38 | static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); } |
|
965 | 12 | String get_name() const override { return impl::name; }_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev Line | Count | Source | 965 | 1 | String get_name() const override { return impl::name; } |
|
966 | | |
967 | 287 | size_t get_number_of_arguments() const override { |
968 | 287 | return get_variadic_argument_types_impl().size(); |
969 | 287 | } _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 37 | size_t get_number_of_arguments() const override { | 968 | 37 | return get_variadic_argument_types_impl().size(); | 969 | 37 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 27 | size_t get_number_of_arguments() const override { | 968 | 27 | return get_variadic_argument_types_impl().size(); | 969 | 27 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 33 | size_t get_number_of_arguments() const override { | 968 | 33 | return get_variadic_argument_types_impl().size(); | 969 | 33 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 12 | size_t get_number_of_arguments() const override { | 968 | 12 | return get_variadic_argument_types_impl().size(); | 969 | 12 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 21 | size_t get_number_of_arguments() const override { | 968 | 21 | return get_variadic_argument_types_impl().size(); | 969 | 21 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 76 | size_t get_number_of_arguments() const override { | 968 | 76 | return get_variadic_argument_types_impl().size(); | 969 | 76 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 1 | size_t get_number_of_arguments() const override { | 968 | 1 | return get_variadic_argument_types_impl().size(); | 969 | 1 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 5 | size_t get_number_of_arguments() const override { | 968 | 5 | return get_variadic_argument_types_impl().size(); | 969 | 5 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 6 | size_t get_number_of_arguments() const override { | 968 | 6 | return get_variadic_argument_types_impl().size(); | 969 | 6 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 19 | size_t get_number_of_arguments() const override { | 968 | 19 | return get_variadic_argument_types_impl().size(); | 969 | 19 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 21 | size_t get_number_of_arguments() const override { | 968 | 21 | return get_variadic_argument_types_impl().size(); | 969 | 21 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv Line | Count | Source | 967 | 29 | size_t get_number_of_arguments() const override { | 968 | 29 | return get_variadic_argument_types_impl().size(); | 969 | 29 | } |
|
970 | | |
971 | 287 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
972 | 287 | if (!is_string_type(arguments[0]->get_primitive_type())) { |
973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
974 | 0 | "Illegal type {} of argument of function {}", |
975 | 0 | arguments[0]->get_name(), get_name()); |
976 | 0 | } |
977 | 287 | return arguments[0]; |
978 | 287 | } _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 37 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 37 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 37 | return arguments[0]; | 978 | 37 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 27 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 27 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 27 | return arguments[0]; | 978 | 27 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 33 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 33 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 33 | return arguments[0]; | 978 | 33 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 12 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 12 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 12 | return arguments[0]; | 978 | 12 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 21 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 21 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 21 | return arguments[0]; | 978 | 21 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 76 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 76 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 76 | return arguments[0]; | 978 | 76 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 1 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 1 | return arguments[0]; | 978 | 1 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 5 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 5 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 5 | return arguments[0]; | 978 | 5 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 6 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 6 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 6 | return arguments[0]; | 978 | 6 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 19 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 19 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 19 | return arguments[0]; | 978 | 19 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 21 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 21 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 21 | return arguments[0]; | 978 | 21 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 971 | 29 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 972 | 29 | if (!is_string_type(arguments[0]->get_primitive_type())) { | 973 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 974 | 0 | "Illegal type {} of argument of function {}", | 975 | 0 | arguments[0]->get_name(), get_name()); | 976 | 0 | } | 977 | 29 | return arguments[0]; | 978 | 29 | } |
|
979 | | // The second parameter of "trim" is a constant. |
980 | 570 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 85 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 58 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 64 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 37 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 41 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 96 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 1 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 5 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 6 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 67 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 51 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv Line | Count | Source | 980 | 59 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
|
981 | | |
982 | 383 | DataTypes get_variadic_argument_types_impl() const override { |
983 | 383 | return impl::get_variadic_argument_types(); |
984 | 383 | } _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 45 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 45 | return impl::get_variadic_argument_types(); | 984 | 45 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 35 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 35 | return impl::get_variadic_argument_types(); | 984 | 35 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 41 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 41 | return impl::get_variadic_argument_types(); | 984 | 41 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 20 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 20 | return impl::get_variadic_argument_types(); | 984 | 20 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 29 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 29 | return impl::get_variadic_argument_types(); | 984 | 29 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 84 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 84 | return impl::get_variadic_argument_types(); | 984 | 84 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 9 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 9 | return impl::get_variadic_argument_types(); | 984 | 9 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 13 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 13 | return impl::get_variadic_argument_types(); | 984 | 13 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 14 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 14 | return impl::get_variadic_argument_types(); | 984 | 14 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 27 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 27 | return impl::get_variadic_argument_types(); | 984 | 27 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 29 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 29 | return impl::get_variadic_argument_types(); | 984 | 29 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv Line | Count | Source | 982 | 37 | DataTypes get_variadic_argument_types_impl() const override { | 983 | 37 | return impl::get_variadic_argument_types(); | 984 | 37 | } |
|
985 | | |
986 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
987 | 420 | uint32_t result, size_t input_rows_count) const override { |
988 | 420 | return impl::execute(context, block, arguments, result, input_rows_count); |
989 | 420 | } _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 48 | uint32_t result, size_t input_rows_count) const override { | 988 | 48 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 48 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 37 | uint32_t result, size_t input_rows_count) const override { | 988 | 37 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 37 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 41 | uint32_t result, size_t input_rows_count) const override { | 988 | 41 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 41 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 26 | uint32_t result, size_t input_rows_count) const override { | 988 | 26 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 26 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 32 | uint32_t result, size_t input_rows_count) const override { | 988 | 32 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 32 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 85 | uint32_t result, size_t input_rows_count) const override { | 988 | 85 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 85 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 1 | uint32_t result, size_t input_rows_count) const override { | 988 | 1 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 1 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 5 | uint32_t result, size_t input_rows_count) const override { | 988 | 5 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 5 | } |
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 6 | uint32_t result, size_t input_rows_count) const override { | 988 | 6 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 6 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 50 | uint32_t result, size_t input_rows_count) const override { | 988 | 50 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 50 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 41 | uint32_t result, size_t input_rows_count) const override { | 988 | 41 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 41 | } |
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 987 | 48 | uint32_t result, size_t input_rows_count) const override { | 988 | 48 | return impl::execute(context, block, arguments, result, input_rows_count); | 989 | 48 | } |
|
990 | | }; |
991 | | |
992 | | struct UnHexImplEmpty { |
993 | | static constexpr auto name = "unhex"; |
994 | | }; |
995 | | |
996 | | struct UnHexImplNull { |
997 | | static constexpr auto name = "unhex_null"; |
998 | | }; |
999 | | |
1000 | | template <typename Name> |
1001 | | struct UnHexImpl { |
1002 | | static constexpr auto name = Name::name; |
1003 | | using ReturnType = DataTypeString; |
1004 | | using ColumnType = ColumnString; |
1005 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
1006 | | |
1007 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
1008 | 160 | ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { |
1009 | 160 | auto rows_count = offsets.size(); |
1010 | 160 | dst_offsets.resize(rows_count); |
1011 | | |
1012 | 160 | int64_t total_size = 0; |
1013 | 368 | for (size_t i = 0; i < rows_count; i++) { |
1014 | 208 | size_t len = offsets[i] - offsets[i - 1]; |
1015 | 208 | total_size += len / 2; |
1016 | 208 | } |
1017 | 160 | ColumnString::check_chars_length(total_size, rows_count); |
1018 | 160 | dst_data.resize(total_size); |
1019 | 160 | char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data()); |
1020 | 160 | size_t offset = 0; |
1021 | | |
1022 | 368 | for (int i = 0; i < rows_count; ++i) { |
1023 | 208 | const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
1024 | 208 | ColumnString::Offset srclen = offsets[i] - offsets[i - 1]; |
1025 | | |
1026 | 208 | if (UNLIKELY(srclen == 0)) { |
1027 | 13 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1028 | 13 | continue; |
1029 | 13 | } |
1030 | | |
1031 | 195 | int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset); |
1032 | | |
1033 | 195 | offset += outlen; |
1034 | 195 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1035 | 195 | } |
1036 | 160 | dst_data.pop_back(total_size - offset); |
1037 | 160 | return Status::OK(); |
1038 | 160 | } |
1039 | | |
1040 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
1041 | | ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, |
1042 | 33 | ColumnUInt8::Container* null_map_data) { |
1043 | 33 | auto rows_count = offsets.size(); |
1044 | 33 | dst_offsets.resize(rows_count); |
1045 | | |
1046 | 33 | int64_t total_size = 0; |
1047 | 84 | for (size_t i = 0; i < rows_count; i++) { |
1048 | 51 | size_t len = offsets[i] - offsets[i - 1]; |
1049 | 51 | total_size += len / 2; |
1050 | 51 | } |
1051 | 33 | ColumnString::check_chars_length(total_size, rows_count); |
1052 | 33 | dst_data.resize(total_size); |
1053 | 33 | char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data()); |
1054 | 33 | size_t offset = 0; |
1055 | | |
1056 | 84 | for (int i = 0; i < rows_count; ++i) { |
1057 | 51 | const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
1058 | 51 | ColumnString::Offset srclen = offsets[i] - offsets[i - 1]; |
1059 | | |
1060 | 51 | if (UNLIKELY(srclen == 0)) { |
1061 | 7 | (*null_map_data)[i] = 1; |
1062 | 7 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1063 | 7 | continue; |
1064 | 7 | } |
1065 | | |
1066 | 44 | int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset); |
1067 | | |
1068 | 44 | if (outlen == 0) { |
1069 | 13 | (*null_map_data)[i] = 1; |
1070 | 13 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1071 | 13 | continue; |
1072 | 13 | } |
1073 | | |
1074 | 31 | offset += outlen; |
1075 | 31 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1076 | 31 | } |
1077 | 33 | dst_data.pop_back(total_size - offset); |
1078 | 33 | return Status::OK(); |
1079 | 33 | } |
1080 | | }; |
1081 | | |
1082 | | struct NameStringSpace { |
1083 | | static constexpr auto name = "space"; |
1084 | | }; |
1085 | | |
1086 | | struct StringSpace { |
1087 | | using ReturnType = DataTypeString; |
1088 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT; |
1089 | | using Type = Int32; |
1090 | | using ReturnColumnType = ColumnString; |
1091 | | |
1092 | | static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data, |
1093 | 10 | ColumnString::Offsets& res_offsets) { |
1094 | 10 | res_offsets.resize(data.size()); |
1095 | 10 | size_t input_size = res_offsets.size(); |
1096 | 10 | int64_t total_size = 0; |
1097 | 34 | for (size_t i = 0; i < input_size; ++i) { |
1098 | 24 | if (data[i] > 0) { |
1099 | 14 | total_size += data[i]; |
1100 | 14 | } |
1101 | 24 | } |
1102 | 10 | ColumnString::check_chars_length(total_size, input_size); |
1103 | 10 | res_data.reserve(total_size); |
1104 | | |
1105 | 34 | for (size_t i = 0; i < input_size; ++i) { |
1106 | 24 | if (data[i] > 0) [[likely]] { |
1107 | 14 | res_data.resize_fill(res_data.size() + data[i], ' '); |
1108 | 14 | cast_set(res_offsets[i], res_data.size()); |
1109 | 14 | } else { |
1110 | 10 | StringOP::push_empty_string(i, res_data, res_offsets); |
1111 | 10 | } |
1112 | 24 | } |
1113 | 10 | return Status::OK(); |
1114 | 10 | } |
1115 | | }; |
1116 | | |
1117 | | struct ToBase64Impl { |
1118 | | static constexpr auto name = "to_base64"; |
1119 | | using ReturnType = DataTypeString; |
1120 | | using ColumnType = ColumnString; |
1121 | | static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING; |
1122 | | |
1123 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
1124 | 107 | ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) { |
1125 | 107 | auto rows_count = offsets.size(); |
1126 | 107 | dst_offsets.resize(rows_count); |
1127 | | |
1128 | 107 | size_t total_size = 0; |
1129 | 250 | for (size_t i = 0; i < rows_count; i++) { |
1130 | 143 | size_t len = offsets[i] - offsets[i - 1]; |
1131 | 143 | total_size += 4 * ((len + 2) / 3); |
1132 | 143 | } |
1133 | 107 | ColumnString::check_chars_length(total_size, rows_count); |
1134 | 107 | dst_data.resize(total_size); |
1135 | 107 | auto* dst_data_ptr = dst_data.data(); |
1136 | 107 | size_t offset = 0; |
1137 | | |
1138 | 250 | for (int i = 0; i < rows_count; ++i) { |
1139 | 143 | const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
1140 | 143 | size_t srclen = offsets[i] - offsets[i - 1]; |
1141 | | |
1142 | 143 | if (UNLIKELY(srclen == 0)) { |
1143 | 7 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1144 | 7 | continue; |
1145 | 7 | } |
1146 | | |
1147 | 136 | auto outlen = doris::base64_encode((const unsigned char*)source, srclen, |
1148 | 136 | (unsigned char*)(dst_data_ptr + offset)); |
1149 | | |
1150 | 136 | offset += outlen; |
1151 | 136 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1152 | 136 | } |
1153 | 107 | dst_data.pop_back(total_size - offset); |
1154 | 107 | return Status::OK(); |
1155 | 107 | } |
1156 | | }; |
1157 | | |
1158 | | struct FromBase64Impl { |
1159 | | static constexpr auto name = "from_base64"; |
1160 | | using ReturnType = DataTypeString; |
1161 | | using ColumnType = ColumnString; |
1162 | | |
1163 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
1164 | | ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets, |
1165 | 117 | NullMap& null_map) { |
1166 | 117 | auto rows_count = offsets.size(); |
1167 | 117 | dst_offsets.resize(rows_count); |
1168 | | |
1169 | 117 | size_t total_size = 0; |
1170 | 287 | for (size_t i = 0; i < rows_count; i++) { |
1171 | 170 | auto len = offsets[i] - offsets[i - 1]; |
1172 | 170 | total_size += len / 4 * 3; |
1173 | 170 | } |
1174 | 117 | ColumnString::check_chars_length(total_size, rows_count); |
1175 | 117 | dst_data.resize(total_size); |
1176 | 117 | char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data()); |
1177 | 117 | size_t offset = 0; |
1178 | | |
1179 | 287 | for (int i = 0; i < rows_count; ++i) { |
1180 | 170 | if (UNLIKELY(null_map[i])) { |
1181 | 0 | null_map[i] = 1; |
1182 | 0 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1183 | 0 | continue; |
1184 | 0 | } |
1185 | | |
1186 | 170 | const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
1187 | 170 | ColumnString::Offset srclen = offsets[i] - offsets[i - 1]; |
1188 | | |
1189 | 170 | if (UNLIKELY(srclen == 0)) { |
1190 | 6 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1191 | 6 | continue; |
1192 | 6 | } |
1193 | | |
1194 | 164 | auto outlen = base64_decode(source, srclen, dst_data_ptr + offset); |
1195 | | |
1196 | 164 | if (outlen < 0) { |
1197 | 60 | null_map[i] = 1; |
1198 | 60 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1199 | 104 | } else { |
1200 | 104 | offset += outlen; |
1201 | 104 | dst_offsets[i] = cast_set<uint32_t>(offset); |
1202 | 104 | } |
1203 | 164 | } |
1204 | 117 | dst_data.pop_back(total_size - offset); |
1205 | 117 | return Status::OK(); |
1206 | 117 | } |
1207 | | }; |
1208 | | |
1209 | | struct StringAppendTrailingCharIfAbsent { |
1210 | | static constexpr auto name = "append_trailing_char_if_absent"; |
1211 | | using Chars = ColumnString::Chars; |
1212 | | using Offsets = ColumnString::Offsets; |
1213 | | using ReturnType = DataTypeString; |
1214 | | using ColumnType = ColumnString; |
1215 | | |
1216 | 48 | static bool str_end_with(const StringRef& str, const StringRef& end) { |
1217 | 48 | if (str.size < end.size) { |
1218 | 11 | return false; |
1219 | 11 | } |
1220 | | // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str. |
1221 | 37 | return str.end_with(end); |
1222 | 48 | } |
1223 | | |
1224 | | static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, |
1225 | | const Chars& rdata, const Offsets& roffsets, Chars& res_data, |
1226 | 56 | Offsets& res_offsets, NullMap& null_map_data) { |
1227 | 56 | DCHECK_EQ(loffsets.size(), roffsets.size()); |
1228 | 56 | size_t input_rows_count = loffsets.size(); |
1229 | 56 | res_offsets.resize(input_rows_count); |
1230 | 56 | fmt::memory_buffer buffer; |
1231 | | |
1232 | 158 | for (size_t i = 0; i < input_rows_count; ++i) { |
1233 | 102 | buffer.clear(); |
1234 | | |
1235 | 102 | StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]), |
1236 | 102 | loffsets[i] - loffsets[i - 1]); |
1237 | 102 | StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]), |
1238 | 102 | roffsets[i] - roffsets[i - 1]); |
1239 | | // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters. |
1240 | 102 | auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length( |
1241 | 102 | rstr.begin(), rstr.end(), 2); |
1242 | | |
1243 | 102 | if (char_len != 1) { |
1244 | 66 | StringOP::push_null_string(i, res_data, res_offsets, null_map_data); |
1245 | 66 | continue; |
1246 | 66 | } |
1247 | 36 | if (str_end_with(lstr, rstr)) { |
1248 | 9 | StringOP::push_value_string(lstr, i, res_data, res_offsets); |
1249 | 9 | continue; |
1250 | 9 | } |
1251 | | |
1252 | 27 | buffer.append(lstr.begin(), lstr.end()); |
1253 | 27 | buffer.append(rstr.begin(), rstr.end()); |
1254 | 27 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
1255 | 27 | res_offsets); |
1256 | 27 | } |
1257 | 56 | } |
1258 | | static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets, |
1259 | | const StringRef& rstr, Chars& res_data, Offsets& res_offsets, |
1260 | 8 | NullMap& null_map_data) { |
1261 | 8 | size_t input_rows_count = loffsets.size(); |
1262 | 8 | res_offsets.resize(input_rows_count); |
1263 | 8 | fmt::memory_buffer buffer; |
1264 | | // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters. |
1265 | 8 | auto [byte_len, char_len] = |
1266 | 8 | simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2); |
1267 | 8 | if (char_len != 1) { |
1268 | 4 | for (size_t i = 0; i < input_rows_count; ++i) { |
1269 | 2 | StringOP::push_null_string(i, res_data, res_offsets, null_map_data); |
1270 | 2 | } |
1271 | 2 | return; |
1272 | 2 | } |
1273 | | |
1274 | 12 | for (size_t i = 0; i < input_rows_count; ++i) { |
1275 | 6 | buffer.clear(); |
1276 | 6 | StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]), |
1277 | 6 | loffsets[i] - loffsets[i - 1]); |
1278 | | |
1279 | 6 | if (str_end_with(lstr, rstr)) { |
1280 | 2 | StringOP::push_value_string(lstr, i, res_data, res_offsets); |
1281 | 2 | continue; |
1282 | 2 | } |
1283 | | |
1284 | 4 | buffer.append(lstr.begin(), lstr.end()); |
1285 | 4 | buffer.append(rstr.begin(), rstr.end()); |
1286 | 4 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
1287 | 4 | res_offsets); |
1288 | 4 | } |
1289 | 6 | } |
1290 | | static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata, |
1291 | | const Offsets& roffsets, Chars& res_data, Offsets& res_offsets, |
1292 | 8 | NullMap& null_map_data) { |
1293 | 8 | size_t input_rows_count = roffsets.size(); |
1294 | 8 | res_offsets.resize(input_rows_count); |
1295 | 8 | fmt::memory_buffer buffer; |
1296 | | |
1297 | 16 | for (size_t i = 0; i < input_rows_count; ++i) { |
1298 | 8 | buffer.clear(); |
1299 | | |
1300 | 8 | StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]), |
1301 | 8 | roffsets[i] - roffsets[i - 1]); |
1302 | | // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters. |
1303 | 8 | auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length( |
1304 | 8 | rstr.begin(), rstr.end(), 2); |
1305 | | |
1306 | 8 | if (char_len != 1) { |
1307 | 2 | StringOP::push_null_string(i, res_data, res_offsets, null_map_data); |
1308 | 2 | continue; |
1309 | 2 | } |
1310 | 6 | if (str_end_with(lstr, rstr)) { |
1311 | 2 | StringOP::push_value_string(lstr, i, res_data, res_offsets); |
1312 | 2 | continue; |
1313 | 2 | } |
1314 | | |
1315 | 4 | buffer.append(lstr.begin(), lstr.end()); |
1316 | 4 | buffer.append(rstr.begin(), rstr.end()); |
1317 | 4 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
1318 | 4 | res_offsets); |
1319 | 4 | } |
1320 | 8 | } |
1321 | | }; |
1322 | | |
1323 | | struct StringLPad { |
1324 | | static constexpr auto name = "lpad"; |
1325 | | static constexpr auto is_lpad = true; |
1326 | | }; |
1327 | | |
1328 | | struct StringRPad { |
1329 | | static constexpr auto name = "rpad"; |
1330 | | static constexpr auto is_lpad = false; |
1331 | | }; |
1332 | | |
1333 | | template <typename LeftDataType, typename RightDataType> |
1334 | | using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>; |
1335 | | |
1336 | | template <typename LeftDataType, typename RightDataType> |
1337 | | using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>; |
1338 | | |
1339 | | template <typename LeftDataType, typename RightDataType> |
1340 | | using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>; |
1341 | | |
1342 | | // ready for regist function |
1343 | | using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>; |
1344 | | using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>; |
1345 | | using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>; |
1346 | | using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>; |
1347 | | using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>; |
1348 | | using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>; |
1349 | | using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>; |
1350 | | |
1351 | | namespace { |
1352 | | |
1353 | 3.45k | ZoneMapFilterResult unsupported_starts_with_zonemap(const ZoneMapEvalContext& ctx) { |
1354 | 3.45k | ++ctx.stats.unsupported_expr_count; |
1355 | 3.45k | return ZoneMapFilterResult::kUnsupported; |
1356 | 3.45k | } |
1357 | | |
1358 | 56 | bool field_less_for_starts_with_zonemap(const Field& lhs, const Field& rhs) { |
1359 | 56 | return (lhs <=> rhs) == std::strong_ordering::less; |
1360 | 56 | } |
1361 | | |
1362 | 56 | Field string_field_for_starts_with_zonemap(std::string_view value) { |
1363 | 56 | return Field::create_field<TYPE_STRING>(std::string(value)); |
1364 | 56 | } |
1365 | | |
1366 | 18 | std::optional<std::string> next_prefix_for_starts_with_zonemap(std::string_view prefix) { |
1367 | | // ZoneMap string bounds are compared by bytewise Field ordering. For starts_with(s, p), |
1368 | | // the safe upper bound is the next byte string after p: p <= s < next_prefix(p). |
1369 | 18 | std::string upper(prefix); |
1370 | 18 | for (auto i = static_cast<int64_t>(upper.size()) - 1; i >= 0; --i) { |
1371 | 18 | auto byte = static_cast<unsigned char>(upper[i]); |
1372 | 18 | if (byte != std::numeric_limits<unsigned char>::max()) { |
1373 | 18 | upper[i] = static_cast<char>(byte + 1); |
1374 | 18 | upper.resize(i + 1); |
1375 | 18 | return upper; |
1376 | 18 | } |
1377 | 18 | } |
1378 | 0 | return std::nullopt; |
1379 | 18 | } |
1380 | | |
1381 | 3.55k | bool supports_starts_with_zonemap_slot_type(const DataTypePtr& data_type) { |
1382 | 3.55k | if (data_type == nullptr) { |
1383 | 0 | return false; |
1384 | 0 | } |
1385 | 3.55k | auto primitive_type = remove_nullable(data_type)->get_primitive_type(); |
1386 | 3.65k | return primitive_type != TYPE_CHAR && is_string_type(primitive_type); |
1387 | 3.55k | } |
1388 | | |
1389 | | ZoneMapFilterResult evaluate_starts_with_zonemap(const ZoneMapEvalContext& ctx, |
1390 | 3.46k | const VExprSPtrs& arguments) { |
1391 | 3.46k | auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments); |
1392 | 3.47k | if (!slot_literal.has_value() || slot_literal->literal_on_left) { |
1393 | 0 | return unsupported_starts_with_zonemap(ctx); |
1394 | 0 | } |
1395 | 3.46k | if (slot_literal->literal.is_null()) { |
1396 | 0 | return ZoneMapFilterResult::kNoMatch; |
1397 | 0 | } |
1398 | 3.46k | const auto* slot_type = ctx.data_type(slot_literal->slot_index); |
1399 | 3.57k | if (slot_type == nullptr || *slot_type == nullptr || slot_literal->literal_type == nullptr) { |
1400 | 0 | return unsupported_starts_with_zonemap(ctx); |
1401 | 0 | } |
1402 | 3.46k | if (!supports_starts_with_zonemap_slot_type(*slot_type) || |
1403 | 3.58k | !is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type())) { |
1404 | 0 | ++ctx.stats.type_mismatch_count; |
1405 | 0 | return unsupported_starts_with_zonemap(ctx); |
1406 | 0 | } |
1407 | 3.46k | auto zone_map_ref = expr_zonemap::fetch_zone_map(ctx, slot_literal->slot_index); |
1408 | 3.46k | if (zone_map_ref == nullptr) { |
1409 | 0 | return ZoneMapFilterResult::kUnsupported; |
1410 | 0 | } |
1411 | 3.46k | const auto& zone_map = *zone_map_ref; |
1412 | 3.46k | if (!zone_map.has_not_null) { |
1413 | 1 | return ZoneMapFilterResult::kNoMatch; |
1414 | 1 | } |
1415 | 3.46k | if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, *slot_type)) { |
1416 | 3.46k | return unsupported_starts_with_zonemap(ctx); |
1417 | 3.46k | } |
1418 | | |
1419 | 7 | const auto prefix = slot_literal->literal.as_string_view(); |
1420 | 7 | if (prefix.empty()) { |
1421 | 0 | return ZoneMapFilterResult::kMayMatch; |
1422 | 0 | } |
1423 | 7 | auto lower = string_field_for_starts_with_zonemap(prefix); |
1424 | 20 | if (field_less_for_starts_with_zonemap(zone_map.max_value, lower)) { |
1425 | 20 | return ZoneMapFilterResult::kNoMatch; |
1426 | 20 | } |
1427 | 18.4E | auto upper_prefix = next_prefix_for_starts_with_zonemap(prefix); |
1428 | 18.4E | if (upper_prefix.has_value() && |
1429 | 18.4E | !field_less_for_starts_with_zonemap(zone_map.min_value, |
1430 | 18 | string_field_for_starts_with_zonemap(*upper_prefix))) { |
1431 | 1 | return ZoneMapFilterResult::kNoMatch; |
1432 | 1 | } |
1433 | 18.4E | return ZoneMapFilterResult::kMayMatch; |
1434 | 18.4E | } |
1435 | | |
1436 | 36 | bool can_evaluate_starts_with_zonemap(const VExprSPtrs& arguments) { |
1437 | 36 | auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments); |
1438 | 38 | if (!slot_literal.has_value() || slot_literal->literal_on_left) { |
1439 | 0 | return false; |
1440 | 0 | } |
1441 | 36 | return supports_starts_with_zonemap_slot_type(slot_literal->slot_type) && |
1442 | 37 | slot_literal->literal_type != nullptr && |
1443 | 37 | is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type()); |
1444 | 36 | } |
1445 | | |
1446 | | } // namespace |
1447 | | |
1448 | | class FunctionStringStartsWith : public FunctionBinaryToType<DataTypeString, DataTypeString, |
1449 | | StringStartsWithImpl, NameStartsWith> { |
1450 | | public: |
1451 | 109 | static FunctionPtr create() { return std::make_shared<FunctionStringStartsWith>(); } |
1452 | | ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx, |
1453 | 3.47k | const VExprSPtrs& arguments) const override { |
1454 | 3.47k | return evaluate_starts_with_zonemap(ctx, arguments); |
1455 | 3.47k | } |
1456 | | |
1457 | 36 | bool can_evaluate_zonemap_filter(const VExprSPtrs& arguments) const override { |
1458 | 36 | return can_evaluate_starts_with_zonemap(arguments); |
1459 | 36 | } |
1460 | | }; |
1461 | | |
1462 | | using FunctionStringEndsWith = |
1463 | | FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>; |
1464 | | using FunctionStringInstr = |
1465 | | FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>; |
1466 | | using FunctionStringLocate = |
1467 | | FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>; |
1468 | | using FunctionStringFindInSet = |
1469 | | FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>; |
1470 | | |
1471 | | using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>; |
1472 | | |
1473 | | using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>; |
1474 | | |
1475 | | using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>; |
1476 | | |
1477 | | using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>; |
1478 | | |
1479 | | using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>; |
1480 | | using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>; |
1481 | | using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>; |
1482 | | using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>; |
1483 | | |
1484 | | using FunctionStringAppendTrailingCharIfAbsent = |
1485 | | FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>; |
1486 | | |
1487 | | using FunctionStringLPad = FunctionStringPad<StringLPad>; |
1488 | | using FunctionStringRPad = FunctionStringPad<StringRPad>; |
1489 | | |
1490 | | extern void register_function_string_basic(SimpleFunctionFactory& factory); |
1491 | | extern void register_function_string_digest(SimpleFunctionFactory& factory); |
1492 | | extern void register_function_string_mask(SimpleFunctionFactory& factory); |
1493 | | extern void register_function_string_misc(SimpleFunctionFactory& factory); |
1494 | | extern void register_function_string_search(SimpleFunctionFactory& factory); |
1495 | | extern void register_function_string_url(SimpleFunctionFactory& factory); |
1496 | | |
1497 | 8 | void register_function_string(SimpleFunctionFactory& factory) { |
1498 | 8 | register_function_string_basic(factory); |
1499 | 8 | register_function_string_digest(factory); |
1500 | 8 | register_function_string_mask(factory); |
1501 | 8 | register_function_string_misc(factory); |
1502 | 8 | register_function_string_search(factory); |
1503 | 8 | register_function_string_url(factory); |
1504 | | |
1505 | 8 | factory.register_function<FunctionStringParseDataSize>(); |
1506 | 8 | factory.register_function<FunctionStringASCII>(); |
1507 | 8 | factory.register_function<FunctionStringLength>(); |
1508 | 8 | factory.register_function<FunctionCrc32>(); |
1509 | 8 | factory.register_function<FunctionStringUTF8Length>(); |
1510 | 8 | factory.register_function<FunctionStringSpace>(); |
1511 | 8 | factory.register_function<FunctionStringStartsWith>(); |
1512 | 8 | factory.register_function<FunctionStringEndsWith>(); |
1513 | 8 | factory.register_function<FunctionStringInstr>(); |
1514 | 8 | factory.register_function<FunctionStringFindInSet>(); |
1515 | 8 | factory.register_function<FunctionStringLocate>(); |
1516 | 8 | factory.register_function<FunctionQuote>(); |
1517 | 8 | factory.register_function<FunctionReverseCommon>(); |
1518 | 8 | factory.register_function<FunctionUnHex>(); |
1519 | 8 | factory.register_function<FunctionUnHexNullable>(); |
1520 | 8 | factory.register_function<FunctionToLower>(); |
1521 | 8 | factory.register_function<FunctionToUpper>(); |
1522 | 8 | factory.register_function<FunctionToInitcap>(); |
1523 | 8 | factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>(); |
1524 | 8 | factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>(); |
1525 | 8 | factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>(); |
1526 | 8 | factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>(); |
1527 | 8 | factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>(); |
1528 | 8 | factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>(); |
1529 | 8 | factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>(); |
1530 | 8 | factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>(); |
1531 | 8 | factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>(); |
1532 | 8 | factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>(); |
1533 | 8 | factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>(); |
1534 | 8 | factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>(); |
1535 | 8 | factory.register_function<FunctionStringConcat>(); |
1536 | 8 | factory.register_function<FunctionStringElt>(); |
1537 | 8 | factory.register_function<FunctionStringConcatWs>(); |
1538 | 8 | factory.register_function<FunctionStringAppendTrailingCharIfAbsent>(); |
1539 | 8 | factory.register_function<FunctionStringRepeat>(); |
1540 | 8 | factory.register_function<FunctionStringLPad>(); |
1541 | 8 | factory.register_function<FunctionStringRPad>(); |
1542 | 8 | factory.register_function<FunctionToBase64>(); |
1543 | 8 | factory.register_function<FunctionFromBase64>(); |
1544 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>(); |
1545 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>(); |
1546 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>(); |
1547 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>(); |
1548 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>(); |
1549 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>(); |
1550 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>(); |
1551 | 8 | factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>(); |
1552 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>(); |
1553 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>(); |
1554 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>(); |
1555 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>(); |
1556 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>(); |
1557 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>(); |
1558 | 8 | factory.register_function< |
1559 | 8 | FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>(); |
1560 | 8 | factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>(); |
1561 | 8 | factory.register_function<FunctionReplace<ReplaceImpl, true>>(); |
1562 | 8 | factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>(); |
1563 | 8 | factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>(); |
1564 | 8 | factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>(); |
1565 | 8 | factory.register_function<FunctionOverlay>(); |
1566 | 8 | factory.register_function<FunctionIsValidUTF8>(); |
1567 | | |
1568 | 8 | factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8"); |
1569 | 8 | factory.register_alias(FunctionToLower::name, "lcase"); |
1570 | 8 | factory.register_alias(FunctionToUpper::name, "ucase"); |
1571 | 8 | factory.register_alias(FunctionStringUTF8Length::name, "character_length"); |
1572 | 8 | factory.register_alias(FunctionStringLength::name, "octet_length"); |
1573 | 8 | factory.register_alias(FunctionOverlay::name, "insert"); |
1574 | 8 | } |
1575 | | |
1576 | | } // namespace doris |