be/src/exprs/function/function_string.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <glog/logging.h> |
21 | | #include <sys/types.h> |
22 | | |
23 | | #include <algorithm> |
24 | | #include <array> |
25 | | #include <boost/iterator/iterator_facade.hpp> |
26 | | #include <boost/locale.hpp> |
27 | | #include <climits> |
28 | | #include <cmath> |
29 | | #include <cstddef> |
30 | | #include <cstdlib> |
31 | | #include <cstring> |
32 | | #include <iomanip> |
33 | | #include <memory> |
34 | | #include <ostream> |
35 | | #include <random> |
36 | | #include <sstream> |
37 | | #include <tuple> |
38 | | #include <type_traits> |
39 | | #include <unordered_map> |
40 | | #include <utility> |
41 | | #include <variant> |
42 | | #include <vector> |
43 | | |
44 | | #include "common/compiler_util.h" // IWYU pragma: keep |
45 | | #include "common/exception.h" |
46 | | #include "common/status.h" |
47 | | #include "core/block/block.h" |
48 | | #include "core/block/column_numbers.h" |
49 | | #include "core/block/column_with_type_and_name.h" |
50 | | #include "core/column/column.h" |
51 | | #include "core/column/column_const.h" |
52 | | #include "core/column/column_varbinary.h" |
53 | | #include "core/column/column_vector.h" |
54 | | #include "core/data_type/data_type.h" |
55 | | #include "core/data_type/define_primitive_type.h" |
56 | | #include "core/data_type/primitive_type.h" |
57 | | #include "core/memcmp_small.h" |
58 | | #include "core/memcpy_small.h" |
59 | | #include "core/pod_array.h" |
60 | | #include "core/pod_array_fwd.h" |
61 | | #include "core/types.h" |
62 | | #include "core/value/decimalv2_value.h" |
63 | | #include "exec/common/hash_table/phmap_fwd_decl.h" |
64 | | #include "exec/common/int_exp.h" |
65 | | #include "exec/common/template_helpers.hpp" |
66 | | #include "exprs/aggregate/aggregate_function.h" |
67 | | #include "exprs/function/function_needs_to_handle_null.h" |
68 | | #include "util/raw_value.h" |
69 | | #include "util/sha.h" |
70 | | #include "util/string_search.hpp" |
71 | | #include "util/string_util.h" |
72 | | #include "util/utf8_check.h" |
73 | | |
74 | | #ifndef USE_LIBCPP |
75 | | #include <memory_resource> |
76 | | #define PMR std::pmr |
77 | | #else |
78 | | #include <boost/container/pmr/monotonic_buffer_resource.hpp> |
79 | | #include <boost/container/pmr/vector.hpp> |
80 | | #define PMR boost::container::pmr |
81 | | #endif |
82 | | |
83 | | #include <fmt/format.h> |
84 | | #include <unicode/normalizer2.h> |
85 | | #include <unicode/stringpiece.h> |
86 | | #include <unicode/unistr.h> |
87 | | |
88 | | #include <cstdint> |
89 | | #include <string> |
90 | | #include <string_view> |
91 | | |
92 | | #include "core/assert_cast.h" |
93 | | #include "core/column/column_array.h" |
94 | | #include "core/column/column_decimal.h" |
95 | | #include "core/column/column_nullable.h" |
96 | | #include "core/column/column_string.h" |
97 | | #include "core/data_type/data_type_array.h" |
98 | | #include "core/data_type/data_type_decimal.h" |
99 | | #include "core/data_type/data_type_nullable.h" |
100 | | #include "core/data_type/data_type_number.h" |
101 | | #include "core/data_type/data_type_string.h" |
102 | | #include "core/string_ref.h" |
103 | | #include "exec/common/pinyin.h" |
104 | | #include "exec/common/stringop_substring.h" |
105 | | #include "exec/common/util.hpp" |
106 | | #include "exprs/function/function.h" |
107 | | #include "exprs/function/function_helpers.h" |
108 | | #include "exprs/function_context.h" |
109 | | #include "exprs/math_functions.h" |
110 | | #include "pugixml.hpp" |
111 | | #include "util/md5.h" |
112 | | #include "util/simd/vstring_function.h" |
113 | | #include "util/sm3.h" |
114 | | #include "util/url_coding.h" |
115 | | #include "util/url_parser.h" |
116 | | |
117 | | namespace doris { |
118 | | #include "common/compile_check_avoid_begin.h" |
119 | | class FunctionStrcmp : public IFunction { |
120 | | public: |
121 | | static constexpr auto name = "strcmp"; |
122 | | |
123 | 8 | static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); } |
124 | | |
125 | 1 | String get_name() const override { return name; } |
126 | | |
127 | 0 | size_t get_number_of_arguments() const override { return 2; } |
128 | | |
129 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
130 | 0 | return std::make_shared<DataTypeInt8>(); |
131 | 0 | } |
132 | | |
133 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
134 | 0 | uint32_t result, size_t input_rows_count) const override { |
135 | 0 | const auto& [arg0_column, arg0_const] = |
136 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
137 | 0 | const auto& [arg1_column, arg1_const] = |
138 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
139 | |
|
140 | 0 | auto result_column = ColumnInt8::create(input_rows_count); |
141 | |
|
142 | 0 | if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) { |
143 | 0 | if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) { |
144 | 0 | if (arg0_const) { |
145 | 0 | scalar_vector(arg0->get_data_at(0), *arg1, *result_column); |
146 | 0 | } else if (arg1_const) { |
147 | 0 | vector_scalar(*arg0, arg1->get_data_at(0), *result_column); |
148 | 0 | } else { |
149 | 0 | vector_vector(*arg0, *arg1, *result_column); |
150 | 0 | } |
151 | 0 | } |
152 | 0 | } |
153 | |
|
154 | 0 | block.replace_by_position(result, std::move(result_column)); |
155 | 0 | return Status::OK(); |
156 | 0 | } |
157 | | |
158 | | private: |
159 | 0 | static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) { |
160 | 0 | size_t size = vec1.size(); |
161 | 0 | for (size_t i = 0; i < size; ++i) { |
162 | 0 | res.get_data()[i] = str.compare(vec1.get_data_at(i)); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | 0 | static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) { |
167 | 0 | size_t size = vec0.size(); |
168 | 0 | for (size_t i = 0; i < size; ++i) { |
169 | 0 | res.get_data()[i] = vec0.get_data_at(i).compare(str); |
170 | 0 | } |
171 | 0 | } |
172 | | |
173 | 0 | static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) { |
174 | 0 | size_t size = vec0.size(); |
175 | 0 | for (size_t i = 0; i < size; ++i) { |
176 | 0 | res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i)); |
177 | 0 | } |
178 | 0 | } |
179 | | }; |
180 | | |
181 | | class FunctionAutoPartitionName : public IFunction { |
182 | | public: |
183 | | static constexpr auto name = "auto_partition_name"; |
184 | 8 | static FunctionPtr create() { return std::make_shared<FunctionAutoPartitionName>(); } |
185 | 0 | String get_name() const override { return name; } |
186 | 0 | size_t get_number_of_arguments() const override { return 0; } |
187 | 1 | bool is_variadic() const override { return true; } |
188 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
189 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
190 | 0 | return std::make_shared<DataTypeString>(); |
191 | 0 | } |
192 | | |
193 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
194 | 0 | uint32_t result, size_t input_rows_count) const override { |
195 | 0 | size_t argument_size = arguments.size(); |
196 | 0 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
197 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
198 | 0 | std::vector<const ColumnString::Chars*> chars_list(argument_size); |
199 | 0 | std::vector<const ColumnString::Offsets*> offsets_list(argument_size); |
200 | 0 | std::vector<bool> is_const_args(argument_size); |
201 | 0 | std::vector<const ColumnUInt8::Container*> null_list(argument_size); |
202 | 0 | std::vector<ColumnPtr> argument_null_columns(argument_size); |
203 | |
|
204 | 0 | std::vector<ColumnPtr> argument_columns(argument_size); |
205 | 0 | for (int i = 0; i < argument_size; ++i) { |
206 | 0 | argument_columns[i] = |
207 | 0 | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
208 | 0 | if (const auto* nullable = |
209 | 0 | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
210 | 0 | null_list[i] = &nullable->get_null_map_data(); |
211 | 0 | argument_null_columns[i] = nullable->get_null_map_column_ptr(); |
212 | 0 | argument_columns[i] = nullable->get_nested_column_ptr(); |
213 | 0 | } else { |
214 | 0 | null_list[i] = &const_null_map->get_data(); |
215 | 0 | } |
216 | |
|
217 | 0 | const auto& [col, is_const] = |
218 | 0 | unpack_if_const(block.get_by_position(arguments[i]).column); |
219 | |
|
220 | 0 | const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get()); |
221 | 0 | chars_list[i] = &col_str->get_chars(); |
222 | 0 | offsets_list[i] = &col_str->get_offsets(); |
223 | 0 | is_const_args[i] = is_const; |
224 | 0 | } |
225 | |
|
226 | 0 | auto res = ColumnString::create(); |
227 | 0 | auto& res_data = res->get_chars(); |
228 | 0 | auto& res_offset = res->get_offsets(); |
229 | 0 | res_offset.resize(input_rows_count); |
230 | |
|
231 | 0 | const char* partition_type = chars_list[0]->raw_data(); |
232 | | // partition type is list|range |
233 | 0 | if (std::strncmp(partition_type, "list", 4) == 0) { |
234 | 0 | return _auto_partition_type_of_list(chars_list, offsets_list, is_const_args, null_list, |
235 | 0 | res_data, res_offset, input_rows_count, |
236 | 0 | argument_size, block, result, res); |
237 | 0 | } else { |
238 | 0 | return _auto_partition_type_of_range(chars_list, offsets_list, is_const_args, res_data, |
239 | 0 | res_offset, input_rows_count, argument_size, block, |
240 | 0 | result, res); |
241 | 0 | } |
242 | 0 | return Status::OK(); |
243 | 0 | } |
244 | | |
245 | | private: |
246 | 0 | std::u16string _string_to_u16string(const std::string& str) const { |
247 | 0 | return boost::locale::conv::utf_to_utf<char16_t>(str); |
248 | 0 | } |
249 | | |
250 | 0 | std::string _string_to_unicode(const std::u16string& s) const { |
251 | 0 | std::string res_s; |
252 | 0 | res_s.reserve(s.size()); |
253 | 0 | if (s.length() > 0 && s[0] == '-') { |
254 | 0 | res_s += '_'; |
255 | 0 | } |
256 | 0 | for (int i = 0; i < s.length(); i++) { |
257 | 0 | char16_t ch = s[i]; |
258 | 0 | if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) { |
259 | 0 | res_s += ch; |
260 | 0 | } else { |
261 | 0 | int unicodeValue = _get_code_point_at(s, i); |
262 | 0 | res_s += fmt::format("{:02x}", static_cast<uint32_t>(unicodeValue)); |
263 | 0 | } |
264 | 0 | } |
265 | 0 | return res_s; |
266 | 0 | } |
267 | | |
268 | 0 | int _get_code_point_at(const std::u16string& str, std::size_t index) const { |
269 | 0 | char16_t first = str[index]; |
270 | | // [0xD800,0xDBFF] is the scope of the first code unit |
271 | 0 | if ((first >= 0xD800 && first <= 0xDBFF) && (index + 1 < str.size())) { |
272 | 0 | char16_t second = str[index + 1]; |
273 | | // [0xDC00,0xDFFF] is the scope of the second code unit |
274 | 0 | if (second >= 0xDC00 && second <= 0xDFFF) { |
275 | 0 | return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000; |
276 | 0 | } |
277 | 0 | } |
278 | | |
279 | 0 | return first; |
280 | 0 | } |
281 | | Status _auto_partition_type_of_list(std::vector<const ColumnString::Chars*>& chars_list, |
282 | | std::vector<const ColumnString::Offsets*>& offsets_list, |
283 | | std::vector<bool>& is_const_args, |
284 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
285 | | auto& res_data, auto& res_offset, size_t input_rows_count, |
286 | | size_t argument_size, Block& block, uint32_t result, |
287 | 0 | auto& res) const { |
288 | 0 | int curr_len = 0; |
289 | 0 | for (int row = 0; row < input_rows_count; row++) { |
290 | 0 | std::string res_p; |
291 | 0 | res_p.reserve(argument_size * 5); |
292 | 0 | res_p += 'p'; |
293 | 0 | for (int col = 1; col < argument_size; col++) { |
294 | 0 | const auto& current_offsets = *offsets_list[col]; |
295 | 0 | const auto& current_chars = *chars_list[col]; |
296 | 0 | const auto& current_nullmap = *null_list[col]; |
297 | |
|
298 | 0 | if (current_nullmap[row]) { |
299 | 0 | res_p += 'X'; |
300 | 0 | } else { |
301 | 0 | auto idx = index_check_const(row, is_const_args[col]); |
302 | |
|
303 | 0 | int size = current_offsets[idx] - current_offsets[idx - 1]; |
304 | 0 | const char* raw_chars = |
305 | 0 | reinterpret_cast<const char*>(¤t_chars[current_offsets[idx - 1]]); |
306 | | // convert string to u16string in order to convert to unicode strings |
307 | 0 | const std::string raw_str(raw_chars, size); |
308 | 0 | auto u16string = _string_to_u16string(raw_str); |
309 | 0 | res_p += _string_to_unicode(u16string) + std::to_string(u16string.size()); |
310 | 0 | } |
311 | 0 | } |
312 | | |
313 | | // check the name of length |
314 | 0 | int len = res_p.size(); |
315 | 0 | if (len > 50) { |
316 | 0 | res_p = std::format("{}_{:08x}", res_p.substr(0, 50), to_hash_code(res_p)); |
317 | 0 | len = res_p.size(); |
318 | 0 | } |
319 | 0 | curr_len += len; |
320 | 0 | res_data.resize(curr_len); |
321 | 0 | memcpy(&res_data[res_offset[row - 1]], res_p.c_str(), len); |
322 | 0 | res_offset[row] = res_offset[row - 1] + len; |
323 | 0 | } |
324 | 0 | block.get_by_position(result).column = std::move(res); |
325 | 0 | return Status::OK(); |
326 | 0 | } |
327 | | |
328 | | size_t _copy_date_str_of_len_to_res_data(auto& res_data, auto& res_offset, |
329 | | std::vector<std::string>& date_str, size_t row, |
330 | 0 | size_t len) const { |
331 | 0 | size_t curr_len = 1; |
332 | 0 | for (int j = 0; j < len; j++) { |
333 | 0 | memcpy(&res_data[res_offset[row - 1]] + curr_len, date_str[j].c_str(), |
334 | 0 | date_str[j].size()); |
335 | 0 | curr_len += date_str[j].size(); |
336 | 0 | } |
337 | 0 | return curr_len; |
338 | 0 | } |
339 | | |
340 | | Status _auto_partition_type_of_range(std::vector<const ColumnString::Chars*>& chars_list, |
341 | | std::vector<const ColumnString::Offsets*>& offsets_list, |
342 | | std::vector<bool>& is_const_args, auto& res_data, |
343 | | auto& res_offset, size_t input_rows_count, |
344 | | size_t argument_size, Block& block, uint32_t result, |
345 | 0 | auto& res) const { |
346 | 0 | const char* range_type = chars_list[1]->raw_data(); |
347 | |
|
348 | 0 | res_data.resize(15 * input_rows_count); |
349 | 0 | for (int i = 0; i < input_rows_count; i++) { |
350 | 0 | const auto& current_offsets = *offsets_list[2]; |
351 | 0 | const auto& current_chars = *chars_list[2]; |
352 | |
|
353 | 0 | auto idx = index_check_const(i, is_const_args[2]); |
354 | 0 | int size = current_offsets[idx] - current_offsets[idx - 1]; |
355 | 0 | const char* tmp = |
356 | 0 | reinterpret_cast<const char*>(¤t_chars[current_offsets[idx - 1]]); |
357 | 0 | std::string to_split_s(tmp, size); |
358 | | |
359 | | // check the str if it is date|datetime |
360 | 0 | RE2 date_regex(R"(^\d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?$)"); |
361 | 0 | if (!RE2::FullMatch(to_split_s, date_regex)) { |
362 | 0 | return Status::InvalidArgument("The range partition only support DATE|DATETIME"); |
363 | 0 | } |
364 | | |
365 | | // split date_str from (yyyy-mm-dd hh:mm:ss) to ([yyyy, mm, dd, hh, mm, ss]) |
366 | 0 | std::vector<std::string> date_str(6); |
367 | 0 | date_str[0] = to_split_s.substr(0, 4); |
368 | 0 | for (int ni = 5, j = 1; ni <= size; ni += 3, j++) { |
369 | 0 | date_str[j] = to_split_s.substr(ni, 2); |
370 | 0 | } |
371 | 0 | int curr_len = 0; |
372 | |
|
373 | 0 | res_data[res_offset[i - 1]] = 'p'; |
374 | | // raw => 2022-12-12 11:30:20 |
375 | | // year => 2022 01 01 00 00 00 |
376 | | // month => 2022 12 01 00 00 00 |
377 | | // day => 2022 12 12 00 00 00 |
378 | | // hour => 2022 12 12 11 00 00 |
379 | | // minute => 2022 12 11 30 00 |
380 | | // second => 2022 12 12 12 30 20 |
381 | |
|
382 | 0 | if (!strncmp(range_type, "year", 4)) { |
383 | 0 | curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 1); |
384 | 0 | memcpy(&res_data[res_offset[i - 1]] + curr_len, "0101", 4); |
385 | 0 | curr_len += 4; |
386 | 0 | } else if (!strncmp(range_type, "month", 5)) { |
387 | 0 | curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 2); |
388 | 0 | memcpy(&res_data[res_offset[i - 1]] + curr_len, "01", 2); |
389 | 0 | curr_len += 2; |
390 | 0 | } else if (!strncmp(range_type, "day", 3)) { |
391 | 0 | curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 3); |
392 | 0 | } else if (!strncmp(range_type, "hour", 4)) { |
393 | 0 | curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 4); |
394 | 0 | } else if (!strncmp(range_type, "minute", 6)) { |
395 | 0 | curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 5); |
396 | 0 | } else if (!strncmp(range_type, "second", 6)) { |
397 | 0 | curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 6); |
398 | 0 | } |
399 | | |
400 | | // fill in zero |
401 | 0 | int zero = 15 - curr_len; |
402 | 0 | std::fill_n(&res_data[res_offset[i - 1]] + curr_len, zero, '0'); |
403 | 0 | curr_len += zero; |
404 | 0 | res_offset[i] = res_offset[i - 1] + curr_len; |
405 | 0 | } |
406 | 0 | block.get_by_position(result).column = std::move(res); |
407 | 0 | return Status::OK(); |
408 | 0 | } |
409 | | |
410 | 0 | int32_t to_hash_code(const std::string& str) const { |
411 | 0 | uint64_t h = 0; |
412 | 0 | for (uint8_t c : str) { |
413 | 0 | h = (h * 31U + c) & 0xFFFFFFFFU; |
414 | 0 | } |
415 | 0 | return static_cast<int32_t>(h); |
416 | 0 | } |
417 | | }; |
418 | | |
419 | | template <typename Impl> |
420 | | class FunctionSubstring : public IFunction { |
421 | | public: |
422 | | static constexpr auto name = SubstringUtil::name; |
423 | 2 | String get_name() const override { return name; }_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE8get_nameB5cxx11Ev Line | Count | Source | 423 | 1 | String get_name() const override { return name; } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE8get_nameB5cxx11Ev Line | Count | Source | 423 | 1 | String get_name() const override { return name; } |
|
424 | 2.27k | static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }_ZN5doris17FunctionSubstringINS_11Substr3ImplEE6createEv Line | Count | Source | 424 | 2.18k | static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); } |
_ZN5doris17FunctionSubstringINS_11Substr2ImplEE6createEv Line | Count | Source | 424 | 93 | static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); } |
|
425 | | |
426 | 2.26k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
427 | 2.26k | return std::make_shared<DataTypeString>(); |
428 | 2.26k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 426 | 2.17k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 427 | 2.17k | return std::make_shared<DataTypeString>(); | 428 | 2.17k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 426 | 85 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 427 | 85 | return std::make_shared<DataTypeString>(); | 428 | 85 | } |
|
429 | 2.27k | DataTypes get_variadic_argument_types_impl() const override { |
430 | 2.27k | return Impl::get_variadic_argument_types(); |
431 | 2.27k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 429 | 2.18k | DataTypes get_variadic_argument_types_impl() const override { | 430 | 2.18k | return Impl::get_variadic_argument_types(); | 431 | 2.18k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 429 | 92 | DataTypes get_variadic_argument_types_impl() const override { | 430 | 92 | return Impl::get_variadic_argument_types(); | 431 | 92 | } |
|
432 | 2.26k | size_t get_number_of_arguments() const override { |
433 | 2.26k | return get_variadic_argument_types_impl().size(); |
434 | 2.26k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE23get_number_of_argumentsEv Line | Count | Source | 432 | 2.17k | size_t get_number_of_arguments() const override { | 433 | 2.17k | return get_variadic_argument_types_impl().size(); | 434 | 2.17k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE23get_number_of_argumentsEv Line | Count | Source | 432 | 85 | size_t get_number_of_arguments() const override { | 433 | 85 | return get_variadic_argument_types_impl().size(); | 434 | 85 | } |
|
435 | | |
436 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
437 | 1.84k | uint32_t result, size_t input_rows_count) const override { |
438 | 1.84k | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
439 | 1.84k | } _ZNK5doris17FunctionSubstringINS_11Substr3ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 437 | 1.78k | uint32_t result, size_t input_rows_count) const override { | 438 | 1.78k | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 439 | 1.78k | } |
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 437 | 55 | uint32_t result, size_t input_rows_count) const override { | 438 | 55 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); | 439 | 55 | } |
|
440 | | }; |
441 | | |
442 | | struct Substr3Impl { |
443 | 2.18k | static DataTypes get_variadic_argument_types() { |
444 | 2.18k | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(), |
445 | 2.18k | std::make_shared<DataTypeInt32>()}; |
446 | 2.18k | } |
447 | | |
448 | | static Status execute_impl(FunctionContext* context, Block& block, |
449 | | const ColumnNumbers& arguments, uint32_t result, |
450 | 1.78k | size_t input_rows_count) { |
451 | 1.78k | SubstringUtil::substring_execute(block, arguments, result, input_rows_count); |
452 | 1.78k | return Status::OK(); |
453 | 1.78k | } |
454 | | }; |
455 | | |
456 | | struct Substr2Impl { |
457 | 92 | static DataTypes get_variadic_argument_types() { |
458 | 92 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()}; |
459 | 92 | } |
460 | | |
461 | | static Status execute_impl(FunctionContext* context, Block& block, |
462 | | const ColumnNumbers& arguments, uint32_t result, |
463 | 55 | size_t input_rows_count) { |
464 | 55 | auto col_len = ColumnInt32::create(input_rows_count); |
465 | 55 | auto& strlen_data = col_len->get_data(); |
466 | | |
467 | 55 | ColumnPtr str_col; |
468 | 55 | bool str_const; |
469 | 55 | std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column); |
470 | | |
471 | 55 | const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets(); |
472 | | |
473 | 55 | if (str_const) { |
474 | 18 | std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]); |
475 | 37 | } else { |
476 | 101 | for (int i = 0; i < input_rows_count; ++i) { |
477 | 64 | strlen_data[i] = str_offset[i] - str_offset[i - 1]; |
478 | 64 | } |
479 | 37 | } |
480 | | |
481 | | // we complete the column2(strlen) with the default value - each row's strlen. |
482 | 55 | block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"}); |
483 | 55 | ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1}; |
484 | | |
485 | 55 | SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); |
486 | 55 | return Status::OK(); |
487 | 55 | } |
488 | | }; |
489 | | |
490 | | template <bool Reverse> |
491 | | class FunctionMaskPartial; |
492 | | |
493 | | class FunctionMask : public IFunction { |
494 | | public: |
495 | | static constexpr auto name = "mask"; |
496 | | static constexpr unsigned char DEFAULT_UPPER_MASK = 'X'; |
497 | | static constexpr unsigned char DEFAULT_LOWER_MASK = 'x'; |
498 | | static constexpr unsigned char DEFAULT_NUMBER_MASK = 'n'; |
499 | 0 | String get_name() const override { return name; } |
500 | 8 | static FunctionPtr create() { return std::make_shared<FunctionMask>(); } |
501 | | |
502 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
503 | 0 | return std::make_shared<DataTypeString>(); |
504 | 0 | } |
505 | | |
506 | 0 | size_t get_number_of_arguments() const override { return 0; } |
507 | | |
508 | 0 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1, 2, 3}; } |
509 | | |
510 | 1 | bool is_variadic() const override { return true; } |
511 | | |
512 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
513 | 0 | uint32_t result, size_t input_rows_count) const override { |
514 | 0 | DCHECK_GE(arguments.size(), 1); |
515 | 0 | DCHECK_LE(arguments.size(), 4); |
516 | |
|
517 | 0 | char upper = DEFAULT_UPPER_MASK, lower = DEFAULT_LOWER_MASK, number = DEFAULT_NUMBER_MASK; |
518 | |
|
519 | 0 | auto res = ColumnString::create(); |
520 | 0 | const auto& source_column = |
521 | 0 | assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column); |
522 | |
|
523 | 0 | if (arguments.size() > 1) { |
524 | 0 | const auto& col = *block.get_by_position(arguments[1]).column; |
525 | 0 | auto string_ref = col.get_data_at(0); |
526 | 0 | if (string_ref.size > 0) { |
527 | 0 | upper = *string_ref.data; |
528 | 0 | } |
529 | 0 | } |
530 | |
|
531 | 0 | if (arguments.size() > 2) { |
532 | 0 | const auto& col = *block.get_by_position(arguments[2]).column; |
533 | 0 | auto string_ref = col.get_data_at(0); |
534 | 0 | if (string_ref.size > 0) { |
535 | 0 | lower = *string_ref.data; |
536 | 0 | } |
537 | 0 | } |
538 | |
|
539 | 0 | if (arguments.size() > 3) { |
540 | 0 | const auto& col = *block.get_by_position(arguments[3]).column; |
541 | 0 | auto string_ref = col.get_data_at(0); |
542 | 0 | if (string_ref.size > 0) { |
543 | 0 | number = *string_ref.data; |
544 | 0 | } |
545 | 0 | } |
546 | |
|
547 | 0 | if (arguments.size() > 4) { |
548 | 0 | return Status::InvalidArgument( |
549 | 0 | fmt::format("too many arguments for function {}", get_name())); |
550 | 0 | } |
551 | | |
552 | 0 | vector_mask(source_column, *res, upper, lower, number); |
553 | |
|
554 | 0 | block.get_by_position(result).column = std::move(res); |
555 | |
|
556 | 0 | return Status::OK(); |
557 | 0 | } |
558 | | friend class FunctionMaskPartial<true>; |
559 | | friend class FunctionMaskPartial<false>; |
560 | | |
561 | | private: |
562 | | static void vector_mask(const ColumnString& source, ColumnString& result, const char upper, |
563 | 0 | const char lower, const char number) { |
564 | 0 | result.get_chars().resize(source.get_chars().size()); |
565 | 0 | result.get_offsets().resize(source.get_offsets().size()); |
566 | 0 | memcpy_small_allow_read_write_overflow15( |
567 | 0 | result.get_offsets().data(), source.get_offsets().data(), |
568 | 0 | source.get_offsets().size() * sizeof(ColumnString::Offset)); |
569 | |
|
570 | 0 | const unsigned char* src = source.get_chars().data(); |
571 | 0 | const size_t size = source.get_chars().size(); |
572 | 0 | unsigned char* res = result.get_chars().data(); |
573 | 0 | mask(src, size, upper, lower, number, res); |
574 | 0 | } |
575 | | |
576 | | static void mask(const unsigned char* __restrict src, const size_t size, |
577 | | const unsigned char upper, const unsigned char lower, |
578 | 0 | const unsigned char number, unsigned char* __restrict res) { |
579 | 0 | for (size_t i = 0; i != size; ++i) { |
580 | 0 | auto c = src[i]; |
581 | 0 | if (c >= 'A' && c <= 'Z') { |
582 | 0 | res[i] = upper; |
583 | 0 | } else if (c >= 'a' && c <= 'z') { |
584 | 0 | res[i] = lower; |
585 | 0 | } else if (c >= '0' && c <= '9') { |
586 | 0 | res[i] = number; |
587 | 0 | } else { |
588 | 0 | res[i] = c; |
589 | 0 | } |
590 | 0 | } |
591 | 0 | } |
592 | | }; |
593 | | |
594 | | template <bool Reverse> |
595 | | class FunctionMaskPartial : public IFunction { |
596 | | public: |
597 | | static constexpr auto name = Reverse ? "mask_last_n" : "mask_first_n"; |
598 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE8get_nameB5cxx11Ev |
599 | 16 | static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }_ZN5doris19FunctionMaskPartialILb1EE6createEv Line | Count | Source | 599 | 8 | static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); } |
_ZN5doris19FunctionMaskPartialILb0EE6createEv Line | Count | Source | 599 | 8 | static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); } |
|
600 | | |
601 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
602 | 0 | return std::make_shared<DataTypeString>(); |
603 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE |
604 | | |
605 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE23get_number_of_argumentsEv |
606 | | |
607 | 2 | bool is_variadic() const override { return true; }_ZNK5doris19FunctionMaskPartialILb1EE11is_variadicEv Line | Count | Source | 607 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris19FunctionMaskPartialILb0EE11is_variadicEv Line | Count | Source | 607 | 1 | bool is_variadic() const override { return true; } |
|
608 | | |
609 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
610 | 0 | uint32_t result, size_t input_rows_count) const override { |
611 | 0 | auto res = ColumnString::create(); |
612 | 0 | auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
613 | 0 | const auto& source_column = assert_cast<const ColumnString&>(*col); |
614 | |
|
615 | 0 | if (arguments.size() == 1) { // no 2nd arg, just mask all |
616 | 0 | FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK, |
617 | 0 | FunctionMask::DEFAULT_LOWER_MASK, |
618 | 0 | FunctionMask::DEFAULT_NUMBER_MASK); |
619 | 0 | } else { |
620 | 0 | const auto& [col_2nd, is_const] = |
621 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
622 | |
|
623 | 0 | const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd); |
624 | |
|
625 | 0 | if (is_const) { |
626 | 0 | RETURN_IF_ERROR(vector<true>(source_column, col_n, *res)); |
627 | 0 | } else { |
628 | 0 | RETURN_IF_ERROR(vector<false>(source_column, col_n, *res)); |
629 | 0 | } |
630 | 0 | } |
631 | | |
632 | 0 | block.get_by_position(result).column = std::move(res); |
633 | |
|
634 | 0 | return Status::OK(); |
635 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
636 | | |
637 | | private: |
638 | | template <bool is_const> |
639 | 0 | static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) { |
640 | 0 | const auto num_rows = src.size(); |
641 | 0 | const auto* chars = src.get_chars().data(); |
642 | 0 | const auto* offsets = src.get_offsets().data(); |
643 | 0 | result.get_chars().resize(src.get_chars().size()); |
644 | 0 | result.get_offsets().resize(src.get_offsets().size()); |
645 | 0 | memcpy_small_allow_read_write_overflow15( |
646 | 0 | result.get_offsets().data(), src.get_offsets().data(), |
647 | 0 | src.get_offsets().size() * sizeof(ColumnString::Offset)); |
648 | 0 | auto* res = result.get_chars().data(); |
649 | |
|
650 | 0 | const auto& col_n_data = col_n.get_data(); |
651 | |
|
652 | 0 | for (ssize_t i = 0; i != num_rows; ++i) { |
653 | 0 | auto offset = offsets[i - 1]; |
654 | 0 | int len = offsets[i] - offset; |
655 | 0 | const int n = col_n_data[index_check_const<is_const>(i)]; |
656 | |
|
657 | 0 | if (n < 0) [[unlikely]] { |
658 | 0 | return Status::InvalidArgument( |
659 | 0 | "function {} only accept non-negative input for 2nd argument but got {}", |
660 | 0 | name, n); |
661 | 0 | } |
662 | | |
663 | 0 | if constexpr (Reverse) { |
664 | 0 | auto start = std::max(len - n, 0); |
665 | 0 | if (start > 0) { |
666 | 0 | memcpy(&res[offset], &chars[offset], start); |
667 | 0 | } |
668 | 0 | offset += start; |
669 | 0 | } else { |
670 | 0 | if (n < len) { |
671 | 0 | memcpy(&res[offset + n], &chars[offset + n], len - n); |
672 | 0 | } |
673 | 0 | } |
674 | |
|
675 | 0 | len = std::min(n, len); |
676 | 0 | FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK, |
677 | 0 | FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK, |
678 | 0 | &res[offset]); |
679 | 0 | } |
680 | | |
681 | 0 | return Status::OK(); |
682 | 0 | } Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb1EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb1EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb0EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb0EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_ |
683 | | }; |
684 | | |
685 | | class FunctionLeft : public IFunction { |
686 | | public: |
687 | | static constexpr auto name = "left"; |
688 | 174 | static FunctionPtr create() { return std::make_shared<FunctionLeft>(); } |
689 | 1 | String get_name() const override { return name; } |
690 | 166 | size_t get_number_of_arguments() const override { return 2; } |
691 | 166 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
692 | 166 | return std::make_shared<DataTypeString>(); |
693 | 166 | } |
694 | | |
695 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
696 | 133 | uint32_t result, size_t input_rows_count) const override { |
697 | 133 | DCHECK_EQ(arguments.size(), 2); |
698 | 133 | auto res = ColumnString::create(); |
699 | 133 | bool col_const[2]; |
700 | 133 | ColumnPtr argument_columns[2]; |
701 | 399 | for (int i = 0; i < 2; ++i) { |
702 | 266 | std::tie(argument_columns[i], col_const[i]) = |
703 | 266 | unpack_if_const(block.get_by_position(arguments[i]).column); |
704 | 266 | } |
705 | | |
706 | 133 | const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]); |
707 | 133 | const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]); |
708 | 133 | const auto is_ascii = str_col.is_ascii(); |
709 | | |
710 | 133 | std::visit( |
711 | 133 | [&](auto is_ascii, auto str_const, auto len_const) { |
712 | 133 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, |
713 | 133 | input_rows_count); |
714 | 133 | }, _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 711 | 9 | [&](auto is_ascii, auto str_const, auto len_const) { | 712 | 9 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 713 | 9 | input_rows_count); | 714 | 9 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 711 | 8 | [&](auto is_ascii, auto str_const, auto len_const) { | 712 | 8 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 713 | 8 | input_rows_count); | 714 | 8 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 711 | 8 | [&](auto is_ascii, auto str_const, auto len_const) { | 712 | 8 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 713 | 8 | input_rows_count); | 714 | 8 | }, |
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 711 | 36 | [&](auto is_ascii, auto str_const, auto len_const) { | 712 | 36 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 713 | 36 | input_rows_count); | 714 | 36 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 711 | 36 | [&](auto is_ascii, auto str_const, auto len_const) { | 712 | 36 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 713 | 36 | input_rows_count); | 714 | 36 | }, |
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 711 | 36 | [&](auto is_ascii, auto str_const, auto len_const) { | 712 | 36 | _execute<is_ascii, str_const, len_const>(str_col, len_col, *res, | 713 | 36 | input_rows_count); | 714 | 36 | }, |
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
715 | 133 | make_bool_variant(is_ascii), make_bool_variant(col_const[0]), |
716 | 133 | make_bool_variant(col_const[1])); |
717 | | |
718 | 133 | block.get_by_position(result).column = std::move(res); |
719 | 133 | return Status::OK(); |
720 | 133 | } |
721 | | |
722 | | template <bool is_ascii, bool str_const, bool len_const> |
723 | | static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res, |
724 | 133 | size_t size) { |
725 | 133 | auto& res_chars = res.get_chars(); |
726 | 133 | auto& res_offsets = res.get_offsets(); |
727 | 133 | res_offsets.resize(size); |
728 | 133 | const auto& len_data = len_col.get_data(); |
729 | | |
730 | 133 | if constexpr (str_const) { |
731 | 44 | res_chars.reserve(size * (str_col.get_chars().size())); |
732 | 89 | } else { |
733 | 89 | res_chars.reserve(str_col.get_chars().size()); |
734 | 89 | } |
735 | | |
736 | 320 | for (int i = 0; i < size; ++i) { |
737 | 187 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); |
738 | 187 | int len = len_data[index_check_const<len_const>(i)]; |
739 | 187 | if (len <= 0 || str.empty()) { |
740 | 55 | StringOP::push_empty_string(i, res_chars, res_offsets); |
741 | 55 | continue; |
742 | 55 | } |
743 | | |
744 | 132 | const char* begin = str.begin(); |
745 | 132 | const char* p = begin; |
746 | | |
747 | 132 | if constexpr (is_ascii) { |
748 | 78 | p = begin + std::min(len, static_cast<int>(str.size)); |
749 | 78 | } else { |
750 | 54 | const char* end = str.end(); |
751 | 396 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { |
752 | 342 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; |
753 | 342 | } |
754 | 54 | } |
755 | | |
756 | 132 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, |
757 | 132 | res_offsets); |
758 | 132 | } |
759 | 133 | } _ZN5doris12FunctionLeft8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 724 | 9 | size_t size) { | 725 | 9 | auto& res_chars = res.get_chars(); | 726 | 9 | auto& res_offsets = res.get_offsets(); | 727 | 9 | res_offsets.resize(size); | 728 | 9 | const auto& len_data = len_col.get_data(); | 729 | | | 730 | | if constexpr (str_const) { | 731 | | res_chars.reserve(size * (str_col.get_chars().size())); | 732 | 9 | } else { | 733 | 9 | res_chars.reserve(str_col.get_chars().size()); | 734 | 9 | } | 735 | | | 736 | 72 | for (int i = 0; i < size; ++i) { | 737 | 63 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 738 | 63 | int len = len_data[index_check_const<len_const>(i)]; | 739 | 63 | if (len <= 0 || str.empty()) { | 740 | 23 | StringOP::push_empty_string(i, res_chars, res_offsets); | 741 | 23 | continue; | 742 | 23 | } | 743 | | | 744 | 40 | const char* begin = str.begin(); | 745 | 40 | const char* p = begin; | 746 | | | 747 | | if constexpr (is_ascii) { | 748 | | p = begin + std::min(len, static_cast<int>(str.size)); | 749 | 40 | } else { | 750 | 40 | const char* end = str.end(); | 751 | 314 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 752 | 274 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 753 | 274 | } | 754 | 40 | } | 755 | | | 756 | 40 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 757 | 40 | res_offsets); | 758 | 40 | } | 759 | 9 | } |
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 724 | 8 | size_t size) { | 725 | 8 | auto& res_chars = res.get_chars(); | 726 | 8 | auto& res_offsets = res.get_offsets(); | 727 | 8 | res_offsets.resize(size); | 728 | 8 | const auto& len_data = len_col.get_data(); | 729 | | | 730 | | if constexpr (str_const) { | 731 | | res_chars.reserve(size * (str_col.get_chars().size())); | 732 | 8 | } else { | 733 | 8 | res_chars.reserve(str_col.get_chars().size()); | 734 | 8 | } | 735 | | | 736 | 16 | for (int i = 0; i < size; ++i) { | 737 | 8 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 738 | 8 | int len = len_data[index_check_const<len_const>(i)]; | 739 | 8 | if (len <= 0 || str.empty()) { | 740 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 741 | 1 | continue; | 742 | 1 | } | 743 | | | 744 | 7 | const char* begin = str.begin(); | 745 | 7 | const char* p = begin; | 746 | | | 747 | | if constexpr (is_ascii) { | 748 | | p = begin + std::min(len, static_cast<int>(str.size)); | 749 | 7 | } else { | 750 | 7 | const char* end = str.end(); | 751 | 41 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 752 | 34 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 753 | 34 | } | 754 | 7 | } | 755 | | | 756 | 7 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 757 | 7 | res_offsets); | 758 | 7 | } | 759 | 8 | } |
_ZN5doris12FunctionLeft8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 724 | 8 | size_t size) { | 725 | 8 | auto& res_chars = res.get_chars(); | 726 | 8 | auto& res_offsets = res.get_offsets(); | 727 | 8 | res_offsets.resize(size); | 728 | 8 | const auto& len_data = len_col.get_data(); | 729 | | | 730 | 8 | if constexpr (str_const) { | 731 | 8 | res_chars.reserve(size * (str_col.get_chars().size())); | 732 | | } else { | 733 | | res_chars.reserve(str_col.get_chars().size()); | 734 | | } | 735 | | | 736 | 16 | for (int i = 0; i < size; ++i) { | 737 | 8 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 738 | 8 | int len = len_data[index_check_const<len_const>(i)]; | 739 | 8 | if (len <= 0 || str.empty()) { | 740 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 741 | 1 | continue; | 742 | 1 | } | 743 | | | 744 | 7 | const char* begin = str.begin(); | 745 | 7 | const char* p = begin; | 746 | | | 747 | | if constexpr (is_ascii) { | 748 | | p = begin + std::min(len, static_cast<int>(str.size)); | 749 | 7 | } else { | 750 | 7 | const char* end = str.end(); | 751 | 41 | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 752 | 34 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 753 | 34 | } | 754 | 7 | } | 755 | | | 756 | 7 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 757 | 7 | res_offsets); | 758 | 7 | } | 759 | 8 | } |
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m _ZN5doris12FunctionLeft8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 724 | 36 | size_t size) { | 725 | 36 | auto& res_chars = res.get_chars(); | 726 | 36 | auto& res_offsets = res.get_offsets(); | 727 | 36 | res_offsets.resize(size); | 728 | 36 | const auto& len_data = len_col.get_data(); | 729 | | | 730 | | if constexpr (str_const) { | 731 | | res_chars.reserve(size * (str_col.get_chars().size())); | 732 | 36 | } else { | 733 | 36 | res_chars.reserve(str_col.get_chars().size()); | 734 | 36 | } | 735 | | | 736 | 72 | for (int i = 0; i < size; ++i) { | 737 | 36 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 738 | 36 | int len = len_data[index_check_const<len_const>(i)]; | 739 | 36 | if (len <= 0 || str.empty()) { | 740 | 10 | StringOP::push_empty_string(i, res_chars, res_offsets); | 741 | 10 | continue; | 742 | 10 | } | 743 | | | 744 | 26 | const char* begin = str.begin(); | 745 | 26 | const char* p = begin; | 746 | | | 747 | 26 | if constexpr (is_ascii) { | 748 | 26 | p = begin + std::min(len, static_cast<int>(str.size)); | 749 | | } else { | 750 | | const char* end = str.end(); | 751 | | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 752 | | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 753 | | } | 754 | | } | 755 | | | 756 | 26 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 757 | 26 | res_offsets); | 758 | 26 | } | 759 | 36 | } |
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 724 | 36 | size_t size) { | 725 | 36 | auto& res_chars = res.get_chars(); | 726 | 36 | auto& res_offsets = res.get_offsets(); | 727 | 36 | res_offsets.resize(size); | 728 | 36 | const auto& len_data = len_col.get_data(); | 729 | | | 730 | | if constexpr (str_const) { | 731 | | res_chars.reserve(size * (str_col.get_chars().size())); | 732 | 36 | } else { | 733 | 36 | res_chars.reserve(str_col.get_chars().size()); | 734 | 36 | } | 735 | | | 736 | 72 | for (int i = 0; i < size; ++i) { | 737 | 36 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 738 | 36 | int len = len_data[index_check_const<len_const>(i)]; | 739 | 36 | if (len <= 0 || str.empty()) { | 740 | 10 | StringOP::push_empty_string(i, res_chars, res_offsets); | 741 | 10 | continue; | 742 | 10 | } | 743 | | | 744 | 26 | const char* begin = str.begin(); | 745 | 26 | const char* p = begin; | 746 | | | 747 | 26 | if constexpr (is_ascii) { | 748 | 26 | p = begin + std::min(len, static_cast<int>(str.size)); | 749 | | } else { | 750 | | const char* end = str.end(); | 751 | | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 752 | | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 753 | | } | 754 | | } | 755 | | | 756 | 26 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 757 | 26 | res_offsets); | 758 | 26 | } | 759 | 36 | } |
_ZN5doris12FunctionLeft8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m Line | Count | Source | 724 | 36 | size_t size) { | 725 | 36 | auto& res_chars = res.get_chars(); | 726 | 36 | auto& res_offsets = res.get_offsets(); | 727 | 36 | res_offsets.resize(size); | 728 | 36 | const auto& len_data = len_col.get_data(); | 729 | | | 730 | 36 | if constexpr (str_const) { | 731 | 36 | res_chars.reserve(size * (str_col.get_chars().size())); | 732 | | } else { | 733 | | res_chars.reserve(str_col.get_chars().size()); | 734 | | } | 735 | | | 736 | 72 | for (int i = 0; i < size; ++i) { | 737 | 36 | auto str = str_col.get_data_at(index_check_const<str_const>(i)); | 738 | 36 | int len = len_data[index_check_const<len_const>(i)]; | 739 | 36 | if (len <= 0 || str.empty()) { | 740 | 10 | StringOP::push_empty_string(i, res_chars, res_offsets); | 741 | 10 | continue; | 742 | 10 | } | 743 | | | 744 | 26 | const char* begin = str.begin(); | 745 | 26 | const char* p = begin; | 746 | | | 747 | 26 | if constexpr (is_ascii) { | 748 | 26 | p = begin + std::min(len, static_cast<int>(str.size)); | 749 | | } else { | 750 | | const char* end = str.end(); | 751 | | for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) { | 752 | | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 753 | | } | 754 | | } | 755 | | | 756 | 26 | StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars, | 757 | 26 | res_offsets); | 758 | 26 | } | 759 | 36 | } |
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m |
760 | | }; |
761 | | |
762 | | class FunctionRight : public IFunction { |
763 | | public: |
764 | | static constexpr auto name = "right"; |
765 | 87 | static FunctionPtr create() { return std::make_shared<FunctionRight>(); } |
766 | 1 | String get_name() const override { return name; } |
767 | 79 | size_t get_number_of_arguments() const override { return 2; } |
768 | 79 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
769 | 79 | return std::make_shared<DataTypeString>(); |
770 | 79 | } |
771 | | |
772 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
773 | 61 | uint32_t result, size_t input_rows_count) const override { |
774 | 61 | auto int_type = std::make_shared<DataTypeInt32>(); |
775 | 61 | auto params1 = ColumnInt32::create(input_rows_count); |
776 | 61 | auto params2 = ColumnInt32::create(input_rows_count); |
777 | 61 | size_t num_columns_without_result = block.columns(); |
778 | | |
779 | | // params1 = max(arg[1], -len(arg)) |
780 | 61 | auto& index_data = params1->get_data(); |
781 | 61 | auto& strlen_data = params2->get_data(); |
782 | | |
783 | 61 | auto str_col = |
784 | 61 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
785 | 61 | const auto* str_column = assert_cast<const ColumnString*>(str_col.get()); |
786 | 61 | auto pos_col = |
787 | 61 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
788 | 61 | const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data(); |
789 | | |
790 | 147 | for (int i = 0; i < input_rows_count; ++i) { |
791 | 86 | auto str = str_column->get_data_at(i); |
792 | 86 | strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size); |
793 | 86 | } |
794 | | |
795 | 147 | for (int i = 0; i < input_rows_count; ++i) { |
796 | 86 | index_data[i] = std::max(-pos_data[i], -strlen_data[i]); |
797 | 86 | } |
798 | | |
799 | 61 | block.insert({std::move(params1), int_type, "index"}); |
800 | 61 | block.insert({std::move(params2), int_type, "strlen"}); |
801 | | |
802 | 61 | ColumnNumbers temp_arguments(3); |
803 | 61 | temp_arguments[0] = arguments[0]; |
804 | 61 | temp_arguments[1] = num_columns_without_result; |
805 | 61 | temp_arguments[2] = num_columns_without_result + 1; |
806 | 61 | SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count); |
807 | 61 | return Status::OK(); |
808 | 61 | } |
809 | | }; |
810 | | |
811 | | struct NullOrEmptyImpl { |
812 | 0 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; } |
813 | | |
814 | | static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
815 | 5 | uint32_t result, size_t input_rows_count, bool reverse) { |
816 | 5 | auto res_map = ColumnUInt8::create(input_rows_count, 0); |
817 | | |
818 | 5 | auto column = block.get_by_position(arguments[0]).column; |
819 | 5 | if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) { |
820 | 5 | column = nullable->get_nested_column_ptr(); |
821 | 5 | VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data()); |
822 | 5 | } |
823 | 5 | auto str_col = assert_cast<const ColumnString*>(column.get()); |
824 | 5 | const auto& offsets = str_col->get_offsets(); |
825 | | |
826 | 5 | auto& res_map_data = res_map->get_data(); |
827 | 13 | for (int i = 0; i < input_rows_count; ++i) { |
828 | 8 | int size = offsets[i] - offsets[i - 1]; |
829 | 8 | res_map_data[i] |= (size == 0); |
830 | 8 | } |
831 | 5 | if (reverse) { |
832 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
833 | 0 | res_map_data[i] = !res_map_data[i]; |
834 | 0 | } |
835 | 0 | } |
836 | | |
837 | 5 | block.replace_by_position(result, std::move(res_map)); |
838 | 5 | return Status::OK(); |
839 | 5 | } |
840 | | }; |
841 | | |
842 | | class FunctionNullOrEmpty : public IFunction { |
843 | | public: |
844 | | static constexpr auto name = "null_or_empty"; |
845 | 13 | static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); } |
846 | 1 | String get_name() const override { return name; } |
847 | 5 | size_t get_number_of_arguments() const override { return 1; } |
848 | | |
849 | 5 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
850 | 5 | return std::make_shared<DataTypeUInt8>(); |
851 | 5 | } |
852 | | |
853 | 10 | bool use_default_implementation_for_nulls() const override { return false; } |
854 | | |
855 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
856 | 5 | uint32_t result, size_t input_rows_count) const override { |
857 | 5 | RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result, |
858 | 5 | input_rows_count, false)); |
859 | 5 | return Status::OK(); |
860 | 5 | } |
861 | | }; |
862 | | |
863 | | class FunctionNotNullOrEmpty : public IFunction { |
864 | | public: |
865 | | static constexpr auto name = "not_null_or_empty"; |
866 | 8 | static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); } |
867 | 1 | String get_name() const override { return name; } |
868 | 0 | size_t get_number_of_arguments() const override { return 1; } |
869 | | |
870 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
871 | 0 | return std::make_shared<DataTypeUInt8>(); |
872 | 0 | } |
873 | | |
874 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
875 | | |
876 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
877 | 0 | uint32_t result, size_t input_rows_count) const override { |
878 | 0 | RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result, |
879 | 0 | input_rows_count, true)); |
880 | 0 | return Status::OK(); |
881 | 0 | } |
882 | | }; |
883 | | |
884 | | class FunctionStringConcat : public IFunction { |
885 | | public: |
886 | | struct ConcatState { |
887 | | bool use_state = false; |
888 | | std::string tail; |
889 | | }; |
890 | | |
891 | | static constexpr auto name = "concat"; |
892 | 561 | static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); } |
893 | 0 | String get_name() const override { return name; } |
894 | 0 | size_t get_number_of_arguments() const override { return 0; } |
895 | 554 | bool is_variadic() const override { return true; } |
896 | | |
897 | 553 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
898 | 553 | return std::make_shared<DataTypeString>(); |
899 | 553 | } |
900 | | |
901 | 1.29k | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
902 | 1.29k | if (scope == FunctionContext::THREAD_LOCAL) { |
903 | 743 | return Status::OK(); |
904 | 743 | } |
905 | 554 | std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>(); |
906 | | |
907 | 554 | context->set_function_state(scope, state); |
908 | | |
909 | 554 | state->use_state = true; |
910 | | |
911 | | // Optimize function calls like this: |
912 | | // concat(col, "123", "abc", "456") -> tail = "123abc456" |
913 | 960 | for (size_t i = 1; i < context->get_num_args(); i++) { |
914 | 763 | const auto* column_string = context->get_constant_col(i); |
915 | 763 | if (column_string == nullptr) { |
916 | 329 | state->use_state = false; |
917 | 329 | return IFunction::open(context, scope); |
918 | 329 | } |
919 | 434 | auto string_vale = column_string->column_ptr->get_data_at(0); |
920 | 434 | if (string_vale.data == nullptr) { |
921 | | // For concat(col, null), it is handled by default_implementation_for_nulls |
922 | 28 | state->use_state = false; |
923 | 28 | return IFunction::open(context, scope); |
924 | 28 | } |
925 | | |
926 | 406 | state->tail.append(string_vale.begin(), string_vale.size); |
927 | 406 | } |
928 | | |
929 | | // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below. |
930 | 197 | state->tail.reserve(state->tail.size() + 16); |
931 | | |
932 | 197 | return IFunction::open(context, scope); |
933 | 554 | } |
934 | | |
935 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
936 | 478 | uint32_t result, size_t input_rows_count) const override { |
937 | 478 | DCHECK_GE(arguments.size(), 1); |
938 | | |
939 | 478 | if (arguments.size() == 1) { |
940 | 3 | block.get_by_position(result).column = block.get_by_position(arguments[0]).column; |
941 | 3 | return Status::OK(); |
942 | 3 | } |
943 | 475 | auto* concat_state = reinterpret_cast<ConcatState*>( |
944 | 475 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
945 | 475 | if (!concat_state) { |
946 | 0 | return Status::RuntimeError("funciton context for function '{}' must have ConcatState;", |
947 | 0 | get_name()); |
948 | 0 | } |
949 | 475 | if (concat_state->use_state) { |
950 | 175 | const auto& [col, is_const] = |
951 | 175 | unpack_if_const(block.get_by_position(arguments[0]).column); |
952 | 175 | const auto* col_str = assert_cast<const ColumnString*>(col.get()); |
953 | 175 | if (is_const) { |
954 | 0 | return execute_const<true>(concat_state, block, col_str, result, input_rows_count); |
955 | 175 | } else { |
956 | 175 | return execute_const<false>(concat_state, block, col_str, result, input_rows_count); |
957 | 175 | } |
958 | | |
959 | 300 | } else { |
960 | 300 | return execute_vecotr(block, arguments, result, input_rows_count); |
961 | 300 | } |
962 | 475 | } |
963 | | |
964 | | Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result, |
965 | 300 | size_t input_rows_count) const { |
966 | 300 | int argument_size = arguments.size(); |
967 | 300 | std::vector<ColumnPtr> argument_columns(argument_size); |
968 | | |
969 | 300 | std::vector<const ColumnString::Offsets*> offsets_list(argument_size); |
970 | 300 | std::vector<const ColumnString::Chars*> chars_list(argument_size); |
971 | 300 | std::vector<bool> is_const_args(argument_size); |
972 | | |
973 | 1.11k | for (int i = 0; i < argument_size; ++i) { |
974 | 818 | const auto& [col, is_const] = |
975 | 818 | unpack_if_const(block.get_by_position(arguments[i]).column); |
976 | | |
977 | 818 | const auto* col_str = assert_cast<const ColumnString*>(col.get()); |
978 | 818 | offsets_list[i] = &col_str->get_offsets(); |
979 | 818 | chars_list[i] = &col_str->get_chars(); |
980 | 818 | is_const_args[i] = is_const; |
981 | 818 | } |
982 | | |
983 | 300 | auto res = ColumnString::create(); |
984 | 300 | auto& res_data = res->get_chars(); |
985 | 300 | auto& res_offset = res->get_offsets(); |
986 | | |
987 | 300 | res_offset.resize(input_rows_count); |
988 | 300 | size_t res_reserve_size = 0; |
989 | 1.11k | for (size_t i = 0; i < argument_size; ++i) { |
990 | 818 | if (is_const_args[i]) { |
991 | 309 | res_reserve_size += (*offsets_list[i])[0] * input_rows_count; |
992 | 509 | } else { |
993 | 509 | res_reserve_size += (*offsets_list[i])[input_rows_count - 1]; |
994 | 509 | } |
995 | 818 | } |
996 | | |
997 | 300 | ColumnString::check_chars_length(res_reserve_size, 0); |
998 | | |
999 | 300 | res_data.resize(res_reserve_size); |
1000 | | |
1001 | 300 | auto* data = res_data.data(); |
1002 | 300 | size_t dst_offset = 0; |
1003 | | |
1004 | 2.14k | for (size_t i = 0; i < input_rows_count; ++i) { |
1005 | 7.17k | for (size_t j = 0; j < argument_size; ++j) { |
1006 | 5.33k | const auto& current_offsets = *offsets_list[j]; |
1007 | 5.33k | const auto& current_chars = *chars_list[j]; |
1008 | 5.33k | auto idx = index_check_const(i, is_const_args[j]); |
1009 | 5.33k | const auto size = current_offsets[idx] - current_offsets[idx - 1]; |
1010 | 5.33k | if (size > 0) { |
1011 | 5.24k | memcpy_small_allow_read_write_overflow15( |
1012 | 5.24k | data + dst_offset, current_chars.data() + current_offsets[idx - 1], |
1013 | 5.24k | size); |
1014 | 5.24k | dst_offset += size; |
1015 | 5.24k | } |
1016 | 5.33k | } |
1017 | 1.84k | res_offset[i] = dst_offset; |
1018 | 1.84k | } |
1019 | | |
1020 | 300 | block.get_by_position(result).column = std::move(res); |
1021 | 300 | return Status::OK(); |
1022 | 300 | } |
1023 | | |
1024 | | template <bool is_const> |
1025 | | Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str, |
1026 | 175 | uint32_t result, size_t input_rows_count) const { |
1027 | | // using tail optimize |
1028 | | |
1029 | 175 | auto res = ColumnString::create(); |
1030 | 175 | auto& res_data = res->get_chars(); |
1031 | 175 | auto& res_offset = res->get_offsets(); |
1032 | 175 | res_offset.resize(input_rows_count); |
1033 | | |
1034 | 175 | size_t res_reserve_size = 0; |
1035 | 175 | if constexpr (is_const) { |
1036 | 0 | res_reserve_size = col_str->get_offsets()[0] * input_rows_count; |
1037 | 175 | } else { |
1038 | 175 | res_reserve_size = col_str->get_offsets()[input_rows_count - 1]; |
1039 | 175 | } |
1040 | 175 | res_reserve_size += concat_state->tail.size() * input_rows_count; |
1041 | | |
1042 | 175 | ColumnString::check_chars_length(res_reserve_size, 0); |
1043 | 175 | res_data.resize(res_reserve_size); |
1044 | | |
1045 | 175 | const auto& tail = concat_state->tail; |
1046 | 175 | auto* data = res_data.data(); |
1047 | 175 | size_t dst_offset = 0; |
1048 | | |
1049 | 352 | for (size_t i = 0; i < input_rows_count; ++i) { |
1050 | 177 | const auto idx = index_check_const<is_const>(i); |
1051 | 177 | StringRef str_val = col_str->get_data_at(idx); |
1052 | | // copy column |
1053 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size); |
1054 | 177 | dst_offset += str_val.size; |
1055 | | // copy tail |
1056 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size()); |
1057 | 177 | dst_offset += tail.size(); |
1058 | 177 | res_offset[i] = dst_offset; |
1059 | 177 | } |
1060 | 175 | block.get_by_position(result).column = std::move(res); |
1061 | 175 | return Status::OK(); |
1062 | 175 | } Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm _ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm Line | Count | Source | 1026 | 175 | uint32_t result, size_t input_rows_count) const { | 1027 | | // using tail optimize | 1028 | | | 1029 | 175 | auto res = ColumnString::create(); | 1030 | 175 | auto& res_data = res->get_chars(); | 1031 | 175 | auto& res_offset = res->get_offsets(); | 1032 | 175 | res_offset.resize(input_rows_count); | 1033 | | | 1034 | 175 | size_t res_reserve_size = 0; | 1035 | | if constexpr (is_const) { | 1036 | | res_reserve_size = col_str->get_offsets()[0] * input_rows_count; | 1037 | 175 | } else { | 1038 | 175 | res_reserve_size = col_str->get_offsets()[input_rows_count - 1]; | 1039 | 175 | } | 1040 | 175 | res_reserve_size += concat_state->tail.size() * input_rows_count; | 1041 | | | 1042 | 175 | ColumnString::check_chars_length(res_reserve_size, 0); | 1043 | 175 | res_data.resize(res_reserve_size); | 1044 | | | 1045 | 175 | const auto& tail = concat_state->tail; | 1046 | 175 | auto* data = res_data.data(); | 1047 | 175 | size_t dst_offset = 0; | 1048 | | | 1049 | 352 | for (size_t i = 0; i < input_rows_count; ++i) { | 1050 | 177 | const auto idx = index_check_const<is_const>(i); | 1051 | 177 | StringRef str_val = col_str->get_data_at(idx); | 1052 | | // copy column | 1053 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size); | 1054 | 177 | dst_offset += str_val.size; | 1055 | | // copy tail | 1056 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size()); | 1057 | 177 | dst_offset += tail.size(); | 1058 | 177 | res_offset[i] = dst_offset; | 1059 | 177 | } | 1060 | 175 | block.get_by_position(result).column = std::move(res); | 1061 | 175 | return Status::OK(); | 1062 | 175 | } |
|
1063 | | }; |
1064 | | |
1065 | | class FunctionStringElt : public IFunction { |
1066 | | public: |
1067 | | static constexpr auto name = "elt"; |
1068 | 366 | static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); } |
1069 | 0 | String get_name() const override { return name; } |
1070 | 0 | size_t get_number_of_arguments() const override { return 0; } |
1071 | 359 | bool is_variadic() const override { return true; } |
1072 | | |
1073 | 358 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1074 | 358 | return make_nullable(std::make_shared<DataTypeString>()); |
1075 | 358 | } |
1076 | 716 | bool use_default_implementation_for_nulls() const override { return false; } |
1077 | | |
1078 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1079 | 358 | uint32_t result, size_t input_rows_count) const override { |
1080 | 358 | int arguent_size = arguments.size(); |
1081 | 358 | int num_children = arguent_size - 1; |
1082 | 358 | auto res = ColumnString::create(); |
1083 | | |
1084 | 358 | if (auto const_column = check_and_get_column<ColumnConst>( |
1085 | 358 | *block.get_by_position(arguments[0]).column)) { |
1086 | 153 | auto data = const_column->get_data_at(0); |
1087 | | // return NULL, pos is null or pos < 0 or pos > num_children |
1088 | 153 | auto is_null = data.data == nullptr; |
1089 | 153 | auto pos = is_null ? 0 : *(Int32*)data.data; |
1090 | 153 | is_null = pos <= 0 || pos > num_children; |
1091 | | |
1092 | 153 | auto null_map = ColumnUInt8::create(input_rows_count, is_null); |
1093 | 153 | if (is_null) { |
1094 | 135 | res->insert_many_defaults(input_rows_count); |
1095 | 135 | } else { |
1096 | 18 | auto& target_column = block.get_by_position(arguments[pos]).column; |
1097 | 18 | if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) { |
1098 | 6 | auto target_data = target_const_column->get_data_at(0); |
1099 | | // return NULL, no target data |
1100 | 6 | if (target_data.data == nullptr) { |
1101 | 0 | null_map = ColumnUInt8::create(input_rows_count, true); |
1102 | 0 | res->insert_many_defaults(input_rows_count); |
1103 | 6 | } else { |
1104 | 6 | res->insert_data_repeatedly(target_data.data, target_data.size, |
1105 | 6 | input_rows_count); |
1106 | 6 | } |
1107 | 12 | } else if (auto target_nullable_column = |
1108 | 12 | check_and_get_column<ColumnNullable>(*target_column)) { |
1109 | 12 | auto& target_null_map = target_nullable_column->get_null_map_data(); |
1110 | 12 | VectorizedUtils::update_null_map( |
1111 | 12 | assert_cast<ColumnUInt8&>(*null_map).get_data(), target_null_map); |
1112 | | |
1113 | 12 | auto& target_str_column = assert_cast<const ColumnString&>( |
1114 | 12 | target_nullable_column->get_nested_column()); |
1115 | 12 | res->get_chars().assign(target_str_column.get_chars().begin(), |
1116 | 12 | target_str_column.get_chars().end()); |
1117 | 12 | res->get_offsets().assign(target_str_column.get_offsets().begin(), |
1118 | 12 | target_str_column.get_offsets().end()); |
1119 | 12 | } else { |
1120 | 0 | auto& target_str_column = assert_cast<const ColumnString&>(*target_column); |
1121 | 0 | res->get_chars().assign(target_str_column.get_chars().begin(), |
1122 | 0 | target_str_column.get_chars().end()); |
1123 | 0 | res->get_offsets().assign(target_str_column.get_offsets().begin(), |
1124 | 0 | target_str_column.get_offsets().end()); |
1125 | 0 | } |
1126 | 18 | } |
1127 | 153 | block.get_by_position(result).column = |
1128 | 153 | ColumnNullable::create(std::move(res), std::move(null_map)); |
1129 | 205 | } else if (auto pos_null_column = check_and_get_column<ColumnNullable>( |
1130 | 205 | *block.get_by_position(arguments[0]).column)) { |
1131 | 205 | auto& pos_column = |
1132 | 205 | assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column()); |
1133 | 205 | auto& pos_null_map = pos_null_column->get_null_map_data(); |
1134 | 205 | auto null_map = ColumnUInt8::create(input_rows_count, false); |
1135 | 205 | auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data(); |
1136 | | |
1137 | 460 | for (size_t i = 0; i < input_rows_count; ++i) { |
1138 | 255 | auto pos = pos_column.get_element(i); |
1139 | 255 | res_null_map[i] = |
1140 | 255 | pos_null_map[i] || pos <= 0 || pos > num_children || |
1141 | 255 | block.get_by_position(arguments[pos]).column->get_data_at(i).data == |
1142 | 30 | nullptr; |
1143 | 255 | if (res_null_map[i]) { |
1144 | 225 | res->insert_default(); |
1145 | 225 | } else { |
1146 | 30 | auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i); |
1147 | 30 | res->insert_data(insert_data.data, insert_data.size); |
1148 | 30 | } |
1149 | 255 | } |
1150 | 205 | block.get_by_position(result).column = |
1151 | 205 | ColumnNullable::create(std::move(res), std::move(null_map)); |
1152 | 205 | } else { |
1153 | 0 | auto& pos_column = |
1154 | 0 | assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column); |
1155 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, false); |
1156 | 0 | auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data(); |
1157 | |
|
1158 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
1159 | 0 | auto pos = pos_column.get_element(i); |
1160 | 0 | res_null_map[i] = |
1161 | 0 | pos <= 0 || pos > num_children || |
1162 | 0 | block.get_by_position(arguments[pos]).column->get_data_at(i).data == |
1163 | 0 | nullptr; |
1164 | 0 | if (res_null_map[i]) { |
1165 | 0 | res->insert_default(); |
1166 | 0 | } else { |
1167 | 0 | auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i); |
1168 | 0 | res->insert_data(insert_data.data, insert_data.size); |
1169 | 0 | } |
1170 | 0 | } |
1171 | 0 | block.get_by_position(result).column = |
1172 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
1173 | 0 | } |
1174 | 358 | return Status::OK(); |
1175 | 358 | } |
1176 | | }; |
1177 | | |
1178 | | // concat_ws (string,string....) or (string, Array) |
1179 | | // TODO: avoid use fmtlib |
1180 | | class FunctionStringConcatWs : public IFunction { |
1181 | | public: |
1182 | | using Chars = ColumnString::Chars; |
1183 | | using Offsets = ColumnString::Offsets; |
1184 | | |
1185 | | static constexpr auto name = "concat_ws"; |
1186 | 457 | static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); } |
1187 | 0 | String get_name() const override { return name; } |
1188 | 0 | size_t get_number_of_arguments() const override { return 0; } |
1189 | 450 | bool is_variadic() const override { return true; } |
1190 | | |
1191 | 449 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1192 | 449 | const IDataType* first_type = arguments[0].get(); |
1193 | 449 | if (first_type->is_nullable()) { |
1194 | 449 | return make_nullable(std::make_shared<DataTypeString>()); |
1195 | 449 | } else { |
1196 | 0 | return std::make_shared<DataTypeString>(); |
1197 | 0 | } |
1198 | 449 | } |
1199 | 898 | bool use_default_implementation_for_nulls() const override { return false; } |
1200 | | |
1201 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1202 | 449 | uint32_t result, size_t input_rows_count) const override { |
1203 | 449 | DCHECK_GE(arguments.size(), 2); |
1204 | 449 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1205 | | // we create a zero column to simply implement |
1206 | 449 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
1207 | 449 | auto res = ColumnString::create(); |
1208 | 449 | bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable(); |
1209 | 449 | size_t argument_size = arguments.size(); |
1210 | 449 | std::vector<const Offsets*> offsets_list(argument_size); |
1211 | 449 | std::vector<const Chars*> chars_list(argument_size); |
1212 | 449 | std::vector<const ColumnUInt8::Container*> null_list(argument_size); |
1213 | | |
1214 | 449 | std::vector<ColumnPtr> argument_columns(argument_size); |
1215 | 449 | std::vector<ColumnPtr> argument_null_columns(argument_size); |
1216 | | |
1217 | 1.53k | for (size_t i = 0; i < argument_size; ++i) { |
1218 | 1.08k | argument_columns[i] = |
1219 | 1.08k | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
1220 | 1.08k | if (const auto* nullable = |
1221 | 1.08k | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
1222 | | // Danger: Here must dispose the null map data first! Because |
1223 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
1224 | | // of column nullable mem of null map |
1225 | 1.08k | null_list[i] = &nullable->get_null_map_data(); |
1226 | 1.08k | argument_null_columns[i] = nullable->get_null_map_column_ptr(); |
1227 | 1.08k | argument_columns[i] = nullable->get_nested_column_ptr(); |
1228 | 1.08k | } else { |
1229 | 0 | null_list[i] = &const_null_map->get_data(); |
1230 | 0 | } |
1231 | | |
1232 | 1.08k | if (is_column<ColumnArray>(argument_columns[i].get())) { |
1233 | 36 | continue; |
1234 | 36 | } |
1235 | | |
1236 | 1.05k | const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get()); |
1237 | 1.05k | offsets_list[i] = &col_str->get_offsets(); |
1238 | 1.05k | chars_list[i] = &col_str->get_chars(); |
1239 | 1.05k | } |
1240 | | |
1241 | 449 | auto& res_data = res->get_chars(); |
1242 | 449 | auto& res_offset = res->get_offsets(); |
1243 | 449 | res_offset.resize(input_rows_count); |
1244 | | |
1245 | 449 | VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]); |
1246 | 449 | fmt::memory_buffer buffer; |
1247 | 449 | std::vector<std::string_view> views; |
1248 | | |
1249 | 449 | if (is_column<ColumnArray>(argument_columns[1].get())) { |
1250 | | // Determine if the nested type of the array is String |
1251 | 36 | const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]); |
1252 | 36 | if (!array_column.get_data().is_column_string()) { |
1253 | 0 | return Status::NotSupported( |
1254 | 0 | fmt::format("unsupported nested array of type {} for function {}", |
1255 | 0 | is_column_nullable(array_column.get_data()) |
1256 | 0 | ? array_column.get_data().get_name() |
1257 | 0 | : array_column.get_data().get_name(), |
1258 | 0 | get_name())); |
1259 | 0 | } |
1260 | | // Concat string in array |
1261 | 36 | _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list, |
1262 | 36 | null_list, res_data, res_offset); |
1263 | | |
1264 | 413 | } else { |
1265 | | // Concat string |
1266 | 413 | _execute_string(input_rows_count, argument_size, buffer, views, offsets_list, |
1267 | 413 | chars_list, null_list, res_data, res_offset); |
1268 | 413 | } |
1269 | 449 | if (is_null_type) { |
1270 | 449 | block.get_by_position(result).column = |
1271 | 449 | ColumnNullable::create(std::move(res), std::move(null_map)); |
1272 | 449 | } else { |
1273 | 0 | block.get_by_position(result).column = std::move(res); |
1274 | 0 | } |
1275 | 449 | return Status::OK(); |
1276 | 449 | } |
1277 | | |
1278 | | private: |
1279 | | void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column, |
1280 | | fmt::memory_buffer& buffer, std::vector<std::string_view>& views, |
1281 | | const std::vector<const Offsets*>& offsets_list, |
1282 | | const std::vector<const Chars*>& chars_list, |
1283 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
1284 | 36 | Chars& res_data, Offsets& res_offset) const { |
1285 | | // Get array nested column |
1286 | 36 | const UInt8* array_nested_null_map = nullptr; |
1287 | 36 | ColumnPtr array_nested_column = nullptr; |
1288 | | |
1289 | 36 | if (is_column_nullable(array_column.get_data())) { |
1290 | 36 | const auto& array_nested_null_column = |
1291 | 36 | reinterpret_cast<const ColumnNullable&>(array_column.get_data()); |
1292 | | // String's null map in array |
1293 | 36 | array_nested_null_map = |
1294 | 36 | array_nested_null_column.get_null_map_column().get_data().data(); |
1295 | 36 | array_nested_column = array_nested_null_column.get_nested_column_ptr(); |
1296 | 36 | } else { |
1297 | 0 | array_nested_column = array_column.get_data_ptr(); |
1298 | 0 | } |
1299 | | |
1300 | 36 | const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column); |
1301 | 36 | const Chars& string_src_chars = string_column.get_chars(); |
1302 | 36 | const auto& src_string_offsets = string_column.get_offsets(); |
1303 | 36 | const auto& src_array_offsets = array_column.get_offsets(); |
1304 | 36 | size_t current_src_array_offset = 0; |
1305 | | |
1306 | | // Concat string in array |
1307 | 76 | for (size_t i = 0; i < input_rows_count; ++i) { |
1308 | 40 | auto& sep_offsets = *offsets_list[0]; |
1309 | 40 | auto& sep_chars = *chars_list[0]; |
1310 | 40 | auto& sep_nullmap = *null_list[0]; |
1311 | | |
1312 | 40 | if (sep_nullmap[i]) { |
1313 | 8 | res_offset[i] = res_data.size(); |
1314 | 8 | current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1]; |
1315 | 8 | continue; |
1316 | 8 | } |
1317 | | |
1318 | 32 | int sep_size = sep_offsets[i] - sep_offsets[i - 1]; |
1319 | 32 | const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]); |
1320 | | |
1321 | 32 | std::string_view sep(sep_data, sep_size); |
1322 | 32 | buffer.clear(); |
1323 | 32 | views.clear(); |
1324 | | |
1325 | 32 | for (auto next_src_array_offset = src_array_offsets[i]; |
1326 | 128 | current_src_array_offset < next_src_array_offset; ++current_src_array_offset) { |
1327 | 96 | const auto current_src_string_offset = |
1328 | 96 | current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] |
1329 | 96 | : 0; |
1330 | 96 | size_t bytes_to_copy = |
1331 | 96 | src_string_offsets[current_src_array_offset] - current_src_string_offset; |
1332 | 96 | const char* ptr = |
1333 | 96 | reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]); |
1334 | | |
1335 | 96 | if (array_nested_null_map == nullptr || |
1336 | 96 | !array_nested_null_map[current_src_array_offset]) { |
1337 | 96 | views.emplace_back(ptr, bytes_to_copy); |
1338 | 96 | } |
1339 | 96 | } |
1340 | | |
1341 | 32 | fmt::format_to(buffer, "{}", fmt::join(views, sep)); |
1342 | | |
1343 | 32 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
1344 | 32 | res_offset); |
1345 | 32 | } |
1346 | 36 | } |
1347 | | |
1348 | | void _execute_string(const size_t& input_rows_count, const size_t& argument_size, |
1349 | | fmt::memory_buffer& buffer, std::vector<std::string_view>& views, |
1350 | | const std::vector<const Offsets*>& offsets_list, |
1351 | | const std::vector<const Chars*>& chars_list, |
1352 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
1353 | 413 | Chars& res_data, Offsets& res_offset) const { |
1354 | | // Concat string |
1355 | 933 | for (size_t i = 0; i < input_rows_count; ++i) { |
1356 | 520 | auto& sep_offsets = *offsets_list[0]; |
1357 | 520 | auto& sep_chars = *chars_list[0]; |
1358 | 520 | auto& sep_nullmap = *null_list[0]; |
1359 | 520 | if (sep_nullmap[i]) { |
1360 | 72 | res_offset[i] = res_data.size(); |
1361 | 72 | continue; |
1362 | 72 | } |
1363 | | |
1364 | 448 | int sep_size = sep_offsets[i] - sep_offsets[i - 1]; |
1365 | 448 | const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]); |
1366 | | |
1367 | 448 | std::string_view sep(sep_data, sep_size); |
1368 | 448 | buffer.clear(); |
1369 | 448 | views.clear(); |
1370 | 1.04k | for (size_t j = 1; j < argument_size; ++j) { |
1371 | 600 | auto& current_offsets = *offsets_list[j]; |
1372 | 600 | auto& current_chars = *chars_list[j]; |
1373 | 600 | auto& current_nullmap = *null_list[j]; |
1374 | 600 | int size = current_offsets[i] - current_offsets[i - 1]; |
1375 | 600 | const char* ptr = |
1376 | 600 | reinterpret_cast<const char*>(¤t_chars[current_offsets[i - 1]]); |
1377 | 600 | if (!current_nullmap[i]) { |
1378 | 548 | views.emplace_back(ptr, size); |
1379 | 548 | } |
1380 | 600 | } |
1381 | 448 | fmt::format_to(buffer, "{}", fmt::join(views, sep)); |
1382 | 448 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
1383 | 448 | res_offset); |
1384 | 448 | } |
1385 | 413 | } |
1386 | | }; |
1387 | | |
1388 | | class FunctionStringRepeat : public IFunction { |
1389 | | public: |
1390 | | static constexpr auto name = "repeat"; |
1391 | 189 | static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); } |
1392 | 1 | String get_name() const override { return name; } |
1393 | 181 | size_t get_number_of_arguments() const override { return 2; } |
1394 | | // should set NULL value of nested data to default, |
1395 | | // as iff it's not inited and invalid, the repeat result of length is so large cause overflow |
1396 | 163 | bool need_replace_null_data_to_default() const override { return true; } |
1397 | | |
1398 | 181 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1399 | 181 | return make_nullable(std::make_shared<DataTypeString>()); |
1400 | 181 | } |
1401 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1402 | 163 | uint32_t result, size_t input_rows_count) const override { |
1403 | 163 | DCHECK_EQ(arguments.size(), 2); |
1404 | 163 | auto res = ColumnString::create(); |
1405 | 163 | auto null_map = ColumnUInt8::create(); |
1406 | | |
1407 | 163 | ColumnPtr argument_ptr[2]; |
1408 | 163 | argument_ptr[0] = |
1409 | 163 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
1410 | 163 | argument_ptr[1] = block.get_by_position(arguments[1]).column; |
1411 | | |
1412 | 163 | if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) { |
1413 | 163 | if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) { |
1414 | 109 | RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(), |
1415 | 109 | col2->get_data(), res->get_chars(), |
1416 | 109 | res->get_offsets(), null_map->get_data())); |
1417 | 109 | block.replace_by_position( |
1418 | 109 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1419 | 109 | return Status::OK(); |
1420 | 109 | } else if (const auto* col2_const = |
1421 | 54 | check_and_get_column<ColumnConst>(*argument_ptr[1])) { |
1422 | 54 | DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column())); |
1423 | 54 | int repeat = col2_const->get_int(0); |
1424 | 54 | if (repeat <= 0) { |
1425 | 18 | null_map->get_data().resize_fill(input_rows_count, 0); |
1426 | 18 | res->insert_many_defaults(input_rows_count); |
1427 | 36 | } else { |
1428 | 36 | vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(), |
1429 | 36 | res->get_offsets(), null_map->get_data()); |
1430 | 36 | } |
1431 | 54 | block.replace_by_position( |
1432 | 54 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
1433 | 54 | return Status::OK(); |
1434 | 54 | } |
1435 | 163 | } |
1436 | | |
1437 | 0 | return Status::RuntimeError("repeat function get error param: {}, {}", |
1438 | 0 | argument_ptr[0]->get_name(), argument_ptr[1]->get_name()); |
1439 | 163 | } |
1440 | | |
1441 | | Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
1442 | | const ColumnInt32::Container& repeats, ColumnString::Chars& res_data, |
1443 | | ColumnString::Offsets& res_offsets, |
1444 | 109 | ColumnUInt8::Container& null_map) const { |
1445 | 109 | size_t input_row_size = offsets.size(); |
1446 | | |
1447 | 109 | fmt::memory_buffer buffer; |
1448 | 109 | res_offsets.resize(input_row_size); |
1449 | 109 | null_map.resize_fill(input_row_size, 0); |
1450 | 277 | for (ssize_t i = 0; i < input_row_size; ++i) { |
1451 | 168 | buffer.clear(); |
1452 | 168 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
1453 | 168 | size_t size = offsets[i] - offsets[i - 1]; |
1454 | 168 | int repeat = repeats[i]; |
1455 | 168 | if (repeat <= 0) { |
1456 | 56 | StringOP::push_empty_string(i, res_data, res_offsets); |
1457 | 112 | } else { |
1458 | 112 | ColumnString::check_chars_length(repeat * size + res_data.size(), 0); |
1459 | 644 | for (int j = 0; j < repeat; ++j) { |
1460 | 532 | buffer.append(raw_str, raw_str + size); |
1461 | 532 | } |
1462 | 112 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, |
1463 | 112 | res_data, res_offsets); |
1464 | 112 | } |
1465 | 168 | } |
1466 | 109 | return Status::OK(); |
1467 | 109 | } |
1468 | | |
1469 | | // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code |
1470 | | // 2. abstract the `vector_vector` and `vector_const` |
1471 | | // 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here |
1472 | | void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
1473 | | int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
1474 | 36 | ColumnUInt8::Container& null_map) const { |
1475 | 36 | size_t input_row_size = offsets.size(); |
1476 | | |
1477 | 36 | fmt::memory_buffer buffer; |
1478 | 36 | res_offsets.resize(input_row_size); |
1479 | 36 | null_map.resize_fill(input_row_size, 0); |
1480 | 72 | for (ssize_t i = 0; i < input_row_size; ++i) { |
1481 | 36 | buffer.clear(); |
1482 | 36 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
1483 | 36 | size_t size = offsets[i] - offsets[i - 1]; |
1484 | 36 | ColumnString::check_chars_length(repeat * size + res_data.size(), 0); |
1485 | | |
1486 | 207 | for (int j = 0; j < repeat; ++j) { |
1487 | 171 | buffer.append(raw_str, raw_str + size); |
1488 | 171 | } |
1489 | 36 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
1490 | 36 | res_offsets); |
1491 | 36 | } |
1492 | 36 | } |
1493 | | }; |
1494 | | |
1495 | | template <typename Impl> |
1496 | | class FunctionStringPad : public IFunction { |
1497 | | public: |
1498 | | static constexpr auto name = Impl::name; |
1499 | 1.38k | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv Line | Count | Source | 1499 | 695 | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv Line | Count | Source | 1499 | 688 | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); } |
|
1500 | 2 | String get_name() const override { return name; }_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev Line | Count | Source | 1500 | 1 | String get_name() const override { return name; } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev Line | Count | Source | 1500 | 1 | String get_name() const override { return name; } |
|
1501 | 1.36k | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv Line | Count | Source | 1501 | 687 | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv Line | Count | Source | 1501 | 680 | size_t get_number_of_arguments() const override { return 3; } |
|
1502 | | |
1503 | 1.36k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1504 | 1.36k | return make_nullable(std::make_shared<DataTypeString>()); |
1505 | 1.36k | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1503 | 687 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1504 | 687 | return make_nullable(std::make_shared<DataTypeString>()); | 1505 | 687 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1503 | 680 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1504 | 680 | return make_nullable(std::make_shared<DataTypeString>()); | 1505 | 680 | } |
|
1506 | | |
1507 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1508 | 870 | uint32_t result, size_t input_rows_count) const override { |
1509 | 870 | DCHECK_GE(arguments.size(), 3); |
1510 | 870 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1511 | | // we create a zero column to simply implement |
1512 | 870 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
1513 | 870 | auto res = ColumnString::create(); |
1514 | | |
1515 | 870 | ColumnPtr col[3]; |
1516 | 870 | bool col_const[3]; |
1517 | 3.48k | for (size_t i = 0; i < 3; ++i) { |
1518 | 2.61k | std::tie(col[i], col_const[i]) = |
1519 | 2.61k | unpack_if_const(block.get_by_position(arguments[i]).column); |
1520 | 2.61k | } |
1521 | 870 | auto& null_map_data = null_map->get_data(); |
1522 | 870 | auto& res_offsets = res->get_offsets(); |
1523 | 870 | auto& res_chars = res->get_chars(); |
1524 | 870 | res_offsets.resize(input_rows_count); |
1525 | | |
1526 | 870 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); |
1527 | 870 | const auto& strcol_offsets = strcol->get_offsets(); |
1528 | 870 | const auto& strcol_chars = strcol->get_chars(); |
1529 | | |
1530 | 870 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); |
1531 | 870 | const auto& col_len_data = col_len->get_data(); |
1532 | | |
1533 | 870 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); |
1534 | 870 | const auto& padcol_offsets = padcol->get_offsets(); |
1535 | 870 | const auto& padcol_chars = padcol->get_chars(); |
1536 | 870 | std::visit( |
1537 | 870 | [&](auto str_const, auto len_const, auto pad_const) { |
1538 | 870 | execute_utf8<str_const, len_const, pad_const>( |
1539 | 870 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, |
1540 | 870 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); |
1541 | 870 | }, _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 63 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 63 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 63 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 63 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 63 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 63 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 63 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 63 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 63 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 63 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 1537 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 62 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 62 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
1542 | 870 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
1543 | 870 | make_bool_variant(col_const[2])); |
1544 | | |
1545 | 870 | block.get_by_position(result).column = |
1546 | 870 | ColumnNullable::create(std::move(res), std::move(null_map)); |
1547 | 870 | return Status::OK(); |
1548 | 870 | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1508 | 435 | uint32_t result, size_t input_rows_count) const override { | 1509 | 435 | DCHECK_GE(arguments.size(), 3); | 1510 | 435 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 1511 | | // we create a zero column to simply implement | 1512 | 435 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); | 1513 | 435 | auto res = ColumnString::create(); | 1514 | | | 1515 | 435 | ColumnPtr col[3]; | 1516 | 435 | bool col_const[3]; | 1517 | 1.74k | for (size_t i = 0; i < 3; ++i) { | 1518 | 1.30k | std::tie(col[i], col_const[i]) = | 1519 | 1.30k | unpack_if_const(block.get_by_position(arguments[i]).column); | 1520 | 1.30k | } | 1521 | 435 | auto& null_map_data = null_map->get_data(); | 1522 | 435 | auto& res_offsets = res->get_offsets(); | 1523 | 435 | auto& res_chars = res->get_chars(); | 1524 | 435 | res_offsets.resize(input_rows_count); | 1525 | | | 1526 | 435 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); | 1527 | 435 | const auto& strcol_offsets = strcol->get_offsets(); | 1528 | 435 | const auto& strcol_chars = strcol->get_chars(); | 1529 | | | 1530 | 435 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); | 1531 | 435 | const auto& col_len_data = col_len->get_data(); | 1532 | | | 1533 | 435 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); | 1534 | 435 | const auto& padcol_offsets = padcol->get_offsets(); | 1535 | 435 | const auto& padcol_chars = padcol->get_chars(); | 1536 | 435 | std::visit( | 1537 | 435 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 435 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 435 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 435 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 435 | }, | 1542 | 435 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 1543 | 435 | make_bool_variant(col_const[2])); | 1544 | | | 1545 | 435 | block.get_by_position(result).column = | 1546 | 435 | ColumnNullable::create(std::move(res), std::move(null_map)); | 1547 | 435 | return Status::OK(); | 1548 | 435 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1508 | 435 | uint32_t result, size_t input_rows_count) const override { | 1509 | 435 | DCHECK_GE(arguments.size(), 3); | 1510 | 435 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 1511 | | // we create a zero column to simply implement | 1512 | 435 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); | 1513 | 435 | auto res = ColumnString::create(); | 1514 | | | 1515 | 435 | ColumnPtr col[3]; | 1516 | 435 | bool col_const[3]; | 1517 | 1.74k | for (size_t i = 0; i < 3; ++i) { | 1518 | 1.30k | std::tie(col[i], col_const[i]) = | 1519 | 1.30k | unpack_if_const(block.get_by_position(arguments[i]).column); | 1520 | 1.30k | } | 1521 | 435 | auto& null_map_data = null_map->get_data(); | 1522 | 435 | auto& res_offsets = res->get_offsets(); | 1523 | 435 | auto& res_chars = res->get_chars(); | 1524 | 435 | res_offsets.resize(input_rows_count); | 1525 | | | 1526 | 435 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); | 1527 | 435 | const auto& strcol_offsets = strcol->get_offsets(); | 1528 | 435 | const auto& strcol_chars = strcol->get_chars(); | 1529 | | | 1530 | 435 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); | 1531 | 435 | const auto& col_len_data = col_len->get_data(); | 1532 | | | 1533 | 435 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); | 1534 | 435 | const auto& padcol_offsets = padcol->get_offsets(); | 1535 | 435 | const auto& padcol_chars = padcol->get_chars(); | 1536 | 435 | std::visit( | 1537 | 435 | [&](auto str_const, auto len_const, auto pad_const) { | 1538 | 435 | execute_utf8<str_const, len_const, pad_const>( | 1539 | 435 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 1540 | 435 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 1541 | 435 | }, | 1542 | 435 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 1543 | 435 | make_bool_variant(col_const[2])); | 1544 | | | 1545 | 435 | block.get_by_position(result).column = | 1546 | 435 | ColumnNullable::create(std::move(res), std::move(null_map)); | 1547 | 435 | return Status::OK(); | 1548 | 435 | } |
|
1549 | | |
1550 | | template <bool str_const, bool len_const, bool pad_const> |
1551 | | void execute_utf8(const ColumnString::Offsets& strcol_offsets, |
1552 | | const ColumnString::Chars& strcol_chars, |
1553 | | const ColumnInt32::Container& col_len_data, |
1554 | | const ColumnString::Offsets& padcol_offsets, |
1555 | | const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets, |
1556 | | ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data, |
1557 | 870 | size_t input_rows_count) const { |
1558 | 870 | std::vector<size_t> pad_index; |
1559 | 870 | size_t const_pad_char_size = 0; |
1560 | | // If pad_const = true, initialize pad_index only once. |
1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. |
1562 | 870 | if constexpr (pad_const) { |
1563 | 372 | const_pad_char_size = simd::VStringFunctions::get_char_len( |
1564 | 372 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); |
1565 | 372 | } |
1566 | | |
1567 | 870 | fmt::memory_buffer buffer; |
1568 | 870 | buffer.resize(strcol_chars.size()); |
1569 | 870 | size_t buffer_len = 0; |
1570 | | |
1571 | 1.93k | for (size_t i = 0; i < input_rows_count; ++i) { |
1572 | 1.06k | if constexpr (!pad_const) { |
1573 | 691 | pad_index.clear(); |
1574 | 691 | } |
1575 | 1.06k | const auto len = col_len_data[index_check_const<len_const>(i)]; |
1576 | 1.06k | if (len < 0) { |
1577 | | // return NULL when input length is invalid number |
1578 | 548 | null_map_data[i] = true; |
1579 | 548 | res_offsets[i] = buffer_len; |
1580 | 548 | } else { |
1581 | 515 | const auto str_idx = index_check_const<str_const>(i); |
1582 | 515 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; |
1583 | 515 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; |
1584 | 515 | const auto pad_idx = index_check_const<pad_const>(i); |
1585 | 515 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; |
1586 | 515 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; |
1587 | | |
1588 | 515 | auto [iterate_byte_len, iterate_char_len] = |
1589 | 515 | simd::VStringFunctions::iterate_utf8_with_limit_length( |
1590 | 515 | (const char*)str_data, (const char*)str_data + str_len, len); |
1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len |
1592 | 515 | if (iterate_char_len == len) { |
1593 | 471 | buffer.resize(buffer_len + iterate_byte_len); |
1594 | 471 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); |
1595 | 471 | buffer_len += iterate_byte_len; |
1596 | 471 | res_offsets[i] = buffer_len; |
1597 | 471 | continue; |
1598 | 471 | } |
1599 | 44 | size_t pad_char_size; |
1600 | 44 | if constexpr (!pad_const) { |
1601 | 32 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, |
1602 | 32 | pad_len, pad_index); |
1603 | 32 | } else { |
1604 | 12 | pad_char_size = const_pad_char_size; |
1605 | 12 | } |
1606 | | |
1607 | | // make compatible with mysql. return empty string if pad is empty |
1608 | 44 | if (pad_char_size == 0) { |
1609 | 6 | res_offsets[i] = buffer_len; |
1610 | 6 | continue; |
1611 | 6 | } |
1612 | 38 | const size_t str_char_size = iterate_char_len; |
1613 | 38 | const size_t pad_times = (len - str_char_size) / pad_char_size; |
1614 | 38 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; |
1615 | 38 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; |
1616 | 38 | ColumnString::check_chars_length(buffer_len + new_capacity, i); |
1617 | 38 | buffer.resize(buffer_len + new_capacity); |
1618 | 38 | if constexpr (!Impl::is_lpad) { |
1619 | 19 | memcpy(buffer.data() + buffer_len, str_data, str_len); |
1620 | 19 | buffer_len += str_len; |
1621 | 19 | } |
1622 | | // Prepend chars of pad. |
1623 | 38 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, |
1624 | 38 | pad_times); |
1625 | 38 | buffer_len += pad_times * pad_len; |
1626 | | |
1627 | 38 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); |
1628 | 38 | buffer_len += pad_remainder_len; |
1629 | | |
1630 | 38 | if constexpr (Impl::is_lpad) { |
1631 | 19 | memcpy(buffer.data() + buffer_len, str_data, str_len); |
1632 | 19 | buffer_len += str_len; |
1633 | 19 | } |
1634 | 38 | res_offsets[i] = buffer_len; |
1635 | 38 | } |
1636 | 1.06k | } |
1637 | 870 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); |
1638 | 870 | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 63 | size_t input_rows_count) const { | 1558 | 63 | std::vector<size_t> pad_index; | 1559 | 63 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 63 | fmt::memory_buffer buffer; | 1568 | 63 | buffer.resize(strcol_chars.size()); | 1569 | 63 | size_t buffer_len = 0; | 1570 | | | 1571 | 223 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 160 | if constexpr (!pad_const) { | 1573 | 160 | pad_index.clear(); | 1574 | 160 | } | 1575 | 160 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 160 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 82 | null_map_data[i] = true; | 1579 | 82 | res_offsets[i] = buffer_len; | 1580 | 82 | } else { | 1581 | 78 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 78 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 78 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 78 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 78 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 78 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 78 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 78 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 78 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 78 | if (iterate_char_len == len) { | 1593 | 68 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 68 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 68 | buffer_len += iterate_byte_len; | 1596 | 68 | res_offsets[i] = buffer_len; | 1597 | 68 | continue; | 1598 | 68 | } | 1599 | 10 | size_t pad_char_size; | 1600 | 10 | if constexpr (!pad_const) { | 1601 | 10 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 10 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 10 | if (pad_char_size == 0) { | 1609 | 3 | res_offsets[i] = buffer_len; | 1610 | 3 | continue; | 1611 | 3 | } | 1612 | 7 | const size_t str_char_size = iterate_char_len; | 1613 | 7 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 7 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 7 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 7 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 7 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 7 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 7 | pad_times); | 1625 | 7 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 7 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 7 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 7 | if constexpr (Impl::is_lpad) { | 1631 | 7 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 7 | buffer_len += str_len; | 1633 | 7 | } | 1634 | 7 | res_offsets[i] = buffer_len; | 1635 | 7 | } | 1636 | 160 | } | 1637 | 63 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 63 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | 62 | if constexpr (pad_const) { | 1563 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | 62 | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | | if constexpr (!pad_const) { | 1573 | | pad_index.clear(); | 1574 | | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | | if constexpr (!pad_const) { | 1601 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | | pad_len, pad_index); | 1603 | 2 | } else { | 1604 | 2 | pad_char_size = const_pad_char_size; | 1605 | 2 | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 2 | if constexpr (Impl::is_lpad) { | 1631 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 2 | buffer_len += str_len; | 1633 | 2 | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 62 | if constexpr (!pad_const) { | 1573 | 62 | pad_index.clear(); | 1574 | 62 | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | 2 | if constexpr (!pad_const) { | 1601 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 2 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 2 | if constexpr (Impl::is_lpad) { | 1631 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 2 | buffer_len += str_len; | 1633 | 2 | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | 62 | if constexpr (pad_const) { | 1563 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | 62 | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | | if constexpr (!pad_const) { | 1573 | | pad_index.clear(); | 1574 | | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | | if constexpr (!pad_const) { | 1601 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | | pad_len, pad_index); | 1603 | 2 | } else { | 1604 | 2 | pad_char_size = const_pad_char_size; | 1605 | 2 | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 2 | if constexpr (Impl::is_lpad) { | 1631 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 2 | buffer_len += str_len; | 1633 | 2 | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 62 | if constexpr (!pad_const) { | 1573 | 62 | pad_index.clear(); | 1574 | 62 | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | 2 | if constexpr (!pad_const) { | 1601 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 2 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 2 | if constexpr (Impl::is_lpad) { | 1631 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 2 | buffer_len += str_len; | 1633 | 2 | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | 62 | if constexpr (pad_const) { | 1563 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | 62 | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | | if constexpr (!pad_const) { | 1573 | | pad_index.clear(); | 1574 | | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | | if constexpr (!pad_const) { | 1601 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | | pad_len, pad_index); | 1603 | 2 | } else { | 1604 | 2 | pad_char_size = const_pad_char_size; | 1605 | 2 | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 2 | if constexpr (Impl::is_lpad) { | 1631 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 2 | buffer_len += str_len; | 1633 | 2 | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 62 | if constexpr (!pad_const) { | 1573 | 62 | pad_index.clear(); | 1574 | 62 | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | 2 | if constexpr (!pad_const) { | 1601 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 2 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | | if constexpr (!Impl::is_lpad) { | 1619 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | | buffer_len += str_len; | 1621 | | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | 2 | if constexpr (Impl::is_lpad) { | 1631 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | 2 | buffer_len += str_len; | 1633 | 2 | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 63 | size_t input_rows_count) const { | 1558 | 63 | std::vector<size_t> pad_index; | 1559 | 63 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 63 | fmt::memory_buffer buffer; | 1568 | 63 | buffer.resize(strcol_chars.size()); | 1569 | 63 | size_t buffer_len = 0; | 1570 | | | 1571 | 222 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 159 | if constexpr (!pad_const) { | 1573 | 159 | pad_index.clear(); | 1574 | 159 | } | 1575 | 159 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 159 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 82 | null_map_data[i] = true; | 1579 | 82 | res_offsets[i] = buffer_len; | 1580 | 82 | } else { | 1581 | 77 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 77 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 77 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 77 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 77 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 77 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 77 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 77 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 77 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 77 | if (iterate_char_len == len) { | 1593 | 67 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 67 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 67 | buffer_len += iterate_byte_len; | 1596 | 67 | res_offsets[i] = buffer_len; | 1597 | 67 | continue; | 1598 | 67 | } | 1599 | 10 | size_t pad_char_size; | 1600 | 10 | if constexpr (!pad_const) { | 1601 | 10 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 10 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 10 | if (pad_char_size == 0) { | 1609 | 3 | res_offsets[i] = buffer_len; | 1610 | 3 | continue; | 1611 | 3 | } | 1612 | 7 | const size_t str_char_size = iterate_char_len; | 1613 | 7 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 7 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 7 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 7 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 7 | buffer.resize(buffer_len + new_capacity); | 1618 | 7 | if constexpr (!Impl::is_lpad) { | 1619 | 7 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 7 | buffer_len += str_len; | 1621 | 7 | } | 1622 | | // Prepend chars of pad. | 1623 | 7 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 7 | pad_times); | 1625 | 7 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 7 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 7 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 7 | res_offsets[i] = buffer_len; | 1635 | 7 | } | 1636 | 159 | } | 1637 | 63 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 63 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | 62 | if constexpr (pad_const) { | 1563 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | 62 | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | | if constexpr (!pad_const) { | 1573 | | pad_index.clear(); | 1574 | | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | | if constexpr (!pad_const) { | 1601 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | | pad_len, pad_index); | 1603 | 2 | } else { | 1604 | 2 | pad_char_size = const_pad_char_size; | 1605 | 2 | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | 2 | if constexpr (!Impl::is_lpad) { | 1619 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 2 | buffer_len += str_len; | 1621 | 2 | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 62 | if constexpr (!pad_const) { | 1573 | 62 | pad_index.clear(); | 1574 | 62 | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | 2 | if constexpr (!pad_const) { | 1601 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 2 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | 2 | if constexpr (!Impl::is_lpad) { | 1619 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 2 | buffer_len += str_len; | 1621 | 2 | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | 62 | if constexpr (pad_const) { | 1563 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | 62 | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | | if constexpr (!pad_const) { | 1573 | | pad_index.clear(); | 1574 | | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | | if constexpr (!pad_const) { | 1601 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | | pad_len, pad_index); | 1603 | 2 | } else { | 1604 | 2 | pad_char_size = const_pad_char_size; | 1605 | 2 | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | 2 | if constexpr (!Impl::is_lpad) { | 1619 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 2 | buffer_len += str_len; | 1621 | 2 | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 62 | if constexpr (!pad_const) { | 1573 | 62 | pad_index.clear(); | 1574 | 62 | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | 2 | if constexpr (!pad_const) { | 1601 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 2 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | 2 | if constexpr (!Impl::is_lpad) { | 1619 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 2 | buffer_len += str_len; | 1621 | 2 | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | 62 | if constexpr (pad_const) { | 1563 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | 62 | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | | if constexpr (!pad_const) { | 1573 | | pad_index.clear(); | 1574 | | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | | if constexpr (!pad_const) { | 1601 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | | pad_len, pad_index); | 1603 | 2 | } else { | 1604 | 2 | pad_char_size = const_pad_char_size; | 1605 | 2 | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | 2 | if constexpr (!Impl::is_lpad) { | 1619 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 2 | buffer_len += str_len; | 1621 | 2 | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 1557 | 62 | size_t input_rows_count) const { | 1558 | 62 | std::vector<size_t> pad_index; | 1559 | 62 | size_t const_pad_char_size = 0; | 1560 | | // If pad_const = true, initialize pad_index only once. | 1561 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 1562 | | if constexpr (pad_const) { | 1563 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 1564 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 1565 | | } | 1566 | | | 1567 | 62 | fmt::memory_buffer buffer; | 1568 | 62 | buffer.resize(strcol_chars.size()); | 1569 | 62 | size_t buffer_len = 0; | 1570 | | | 1571 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 1572 | 62 | if constexpr (!pad_const) { | 1573 | 62 | pad_index.clear(); | 1574 | 62 | } | 1575 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 1576 | 62 | if (len < 0) { | 1577 | | // return NULL when input length is invalid number | 1578 | 32 | null_map_data[i] = true; | 1579 | 32 | res_offsets[i] = buffer_len; | 1580 | 32 | } else { | 1581 | 30 | const auto str_idx = index_check_const<str_const>(i); | 1582 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 1583 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 1584 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 1585 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 1586 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 1587 | | | 1588 | 30 | auto [iterate_byte_len, iterate_char_len] = | 1589 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 1590 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 1591 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 1592 | 30 | if (iterate_char_len == len) { | 1593 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 1594 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 1595 | 28 | buffer_len += iterate_byte_len; | 1596 | 28 | res_offsets[i] = buffer_len; | 1597 | 28 | continue; | 1598 | 28 | } | 1599 | 2 | size_t pad_char_size; | 1600 | 2 | if constexpr (!pad_const) { | 1601 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 1602 | 2 | pad_len, pad_index); | 1603 | | } else { | 1604 | | pad_char_size = const_pad_char_size; | 1605 | | } | 1606 | | | 1607 | | // make compatible with mysql. return empty string if pad is empty | 1608 | 2 | if (pad_char_size == 0) { | 1609 | 0 | res_offsets[i] = buffer_len; | 1610 | 0 | continue; | 1611 | 0 | } | 1612 | 2 | const size_t str_char_size = iterate_char_len; | 1613 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 1614 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 1615 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 1616 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 1617 | 2 | buffer.resize(buffer_len + new_capacity); | 1618 | 2 | if constexpr (!Impl::is_lpad) { | 1619 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1620 | 2 | buffer_len += str_len; | 1621 | 2 | } | 1622 | | // Prepend chars of pad. | 1623 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 1624 | 2 | pad_times); | 1625 | 2 | buffer_len += pad_times * pad_len; | 1626 | | | 1627 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 1628 | 2 | buffer_len += pad_remainder_len; | 1629 | | | 1630 | | if constexpr (Impl::is_lpad) { | 1631 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 1632 | | buffer_len += str_len; | 1633 | | } | 1634 | 2 | res_offsets[i] = buffer_len; | 1635 | 2 | } | 1636 | 62 | } | 1637 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 1638 | 62 | } |
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m |
1639 | | }; |
1640 | | |
1641 | | template <typename Impl> |
1642 | | class FunctionStringFormatRound : public IFunction { |
1643 | | public: |
1644 | | static constexpr auto name = "format_round"; |
1645 | 79 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE6createEv Line | Count | Source | 1645 | 9 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE6createEv Line | Count | Source | 1645 | 9 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE6createEv Line | Count | Source | 1645 | 9 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv Line | Count | Source | 1645 | 9 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv Line | Count | Source | 1645 | 8 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv Line | Count | Source | 1645 | 19 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv Line | Count | Source | 1645 | 8 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv Line | Count | Source | 1645 | 8 | static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); } |
|
1646 | 8 | String get_name() const override { return name; }_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev Line | Count | Source | 1646 | 1 | String get_name() const override { return name; } |
|
1647 | | |
1648 | 15 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1649 | 15 | if (arguments.size() != 2) { |
1650 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
1651 | 0 | "Function {} requires exactly 2 argument", name); |
1652 | 0 | } |
1653 | 15 | return std::make_shared<DataTypeString>(); |
1654 | 15 | } _ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1648 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1649 | 1 | if (arguments.size() != 2) { | 1650 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 1651 | 0 | "Function {} requires exactly 2 argument", name); | 1652 | 0 | } | 1653 | 1 | return std::make_shared<DataTypeString>(); | 1654 | 1 | } |
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1648 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1649 | 1 | if (arguments.size() != 2) { | 1650 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 1651 | 0 | "Function {} requires exactly 2 argument", name); | 1652 | 0 | } | 1653 | 1 | return std::make_shared<DataTypeString>(); | 1654 | 1 | } |
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 1648 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1649 | 1 | if (arguments.size() != 2) { | 1650 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 1651 | 0 | "Function {} requires exactly 2 argument", name); | 1652 | 0 | } | 1653 | 1 | return std::make_shared<DataTypeString>(); | 1654 | 1 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 1648 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1649 | 1 | if (arguments.size() != 2) { | 1650 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 1651 | 0 | "Function {} requires exactly 2 argument", name); | 1652 | 0 | } | 1653 | 1 | return std::make_shared<DataTypeString>(); | 1654 | 1 | } |
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 1648 | 11 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 1649 | 11 | if (arguments.size() != 2) { | 1650 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 1651 | 0 | "Function {} requires exactly 2 argument", name); | 1652 | 0 | } | 1653 | 11 | return std::make_shared<DataTypeString>(); | 1654 | 11 | } |
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE |
1655 | 56 | DataTypes get_variadic_argument_types_impl() const override { |
1656 | 56 | return Impl::get_variadic_argument_types(); |
1657 | 56 | } _ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv Line | Count | Source | 1655 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 1656 | 7 | return Impl::get_variadic_argument_types(); | 1657 | 7 | } |
|
1658 | 15 | size_t get_number_of_arguments() const override { return 2; }_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE23get_number_of_argumentsEv Line | Count | Source | 1658 | 1 | size_t get_number_of_arguments() const override { return 2; } |
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE23get_number_of_argumentsEv Line | Count | Source | 1658 | 1 | size_t get_number_of_arguments() const override { return 2; } |
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE23get_number_of_argumentsEv Line | Count | Source | 1658 | 1 | size_t get_number_of_arguments() const override { return 2; } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv Line | Count | Source | 1658 | 1 | size_t get_number_of_arguments() const override { return 2; } |
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv Line | Count | Source | 1658 | 11 | size_t get_number_of_arguments() const override { return 2; } |
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv |
1659 | | |
1660 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1661 | 12 | uint32_t result, size_t input_rows_count) const override { |
1662 | 12 | auto res_column = ColumnString::create(); |
1663 | 12 | ColumnPtr argument_column = |
1664 | 12 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
1665 | 12 | ColumnPtr argument_column_2; |
1666 | 12 | bool is_const; |
1667 | 12 | std::tie(argument_column_2, is_const) = |
1668 | 12 | unpack_if_const(block.get_by_position(arguments[1]).column); |
1669 | 12 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); |
1670 | | |
1671 | 12 | if (is_const) { |
1672 | 2 | RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column, |
1673 | 2 | argument_column_2, input_rows_count)); |
1674 | 10 | } else { |
1675 | 10 | RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column, |
1676 | 10 | argument_column_2, input_rows_count)); |
1677 | 10 | } |
1678 | | |
1679 | 12 | block.replace_by_position(result, std::move(res_column)); |
1680 | 12 | return Status::OK(); |
1681 | 12 | } _ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1661 | 1 | uint32_t result, size_t input_rows_count) const override { | 1662 | 1 | auto res_column = ColumnString::create(); | 1663 | 1 | ColumnPtr argument_column = | 1664 | 1 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 1665 | 1 | ColumnPtr argument_column_2; | 1666 | 1 | bool is_const; | 1667 | 1 | std::tie(argument_column_2, is_const) = | 1668 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); | 1669 | 1 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); | 1670 | | | 1671 | 1 | if (is_const) { | 1672 | 0 | RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column, | 1673 | 0 | argument_column_2, input_rows_count)); | 1674 | 1 | } else { | 1675 | 1 | RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column, | 1676 | 1 | argument_column_2, input_rows_count)); | 1677 | 1 | } | 1678 | | | 1679 | 1 | block.replace_by_position(result, std::move(res_column)); | 1680 | 1 | return Status::OK(); | 1681 | 1 | } |
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1661 | 1 | uint32_t result, size_t input_rows_count) const override { | 1662 | 1 | auto res_column = ColumnString::create(); | 1663 | 1 | ColumnPtr argument_column = | 1664 | 1 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 1665 | 1 | ColumnPtr argument_column_2; | 1666 | 1 | bool is_const; | 1667 | 1 | std::tie(argument_column_2, is_const) = | 1668 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); | 1669 | 1 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); | 1670 | | | 1671 | 1 | if (is_const) { | 1672 | 0 | RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column, | 1673 | 0 | argument_column_2, input_rows_count)); | 1674 | 1 | } else { | 1675 | 1 | RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column, | 1676 | 1 | argument_column_2, input_rows_count)); | 1677 | 1 | } | 1678 | | | 1679 | 1 | block.replace_by_position(result, std::move(res_column)); | 1680 | 1 | return Status::OK(); | 1681 | 1 | } |
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1661 | 1 | uint32_t result, size_t input_rows_count) const override { | 1662 | 1 | auto res_column = ColumnString::create(); | 1663 | 1 | ColumnPtr argument_column = | 1664 | 1 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 1665 | 1 | ColumnPtr argument_column_2; | 1666 | 1 | bool is_const; | 1667 | 1 | std::tie(argument_column_2, is_const) = | 1668 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); | 1669 | 1 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); | 1670 | | | 1671 | 1 | if (is_const) { | 1672 | 0 | RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column, | 1673 | 0 | argument_column_2, input_rows_count)); | 1674 | 1 | } else { | 1675 | 1 | RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column, | 1676 | 1 | argument_column_2, input_rows_count)); | 1677 | 1 | } | 1678 | | | 1679 | 1 | block.replace_by_position(result, std::move(res_column)); | 1680 | 1 | return Status::OK(); | 1681 | 1 | } |
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1661 | 1 | uint32_t result, size_t input_rows_count) const override { | 1662 | 1 | auto res_column = ColumnString::create(); | 1663 | 1 | ColumnPtr argument_column = | 1664 | 1 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 1665 | 1 | ColumnPtr argument_column_2; | 1666 | 1 | bool is_const; | 1667 | 1 | std::tie(argument_column_2, is_const) = | 1668 | 1 | unpack_if_const(block.get_by_position(arguments[1]).column); | 1669 | 1 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); | 1670 | | | 1671 | 1 | if (is_const) { | 1672 | 0 | RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column, | 1673 | 0 | argument_column_2, input_rows_count)); | 1674 | 1 | } else { | 1675 | 1 | RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column, | 1676 | 1 | argument_column_2, input_rows_count)); | 1677 | 1 | } | 1678 | | | 1679 | 1 | block.replace_by_position(result, std::move(res_column)); | 1680 | 1 | return Status::OK(); | 1681 | 1 | } |
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 1661 | 8 | uint32_t result, size_t input_rows_count) const override { | 1662 | 8 | auto res_column = ColumnString::create(); | 1663 | 8 | ColumnPtr argument_column = | 1664 | 8 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); | 1665 | 8 | ColumnPtr argument_column_2; | 1666 | 8 | bool is_const; | 1667 | 8 | std::tie(argument_column_2, is_const) = | 1668 | 8 | unpack_if_const(block.get_by_position(arguments[1]).column); | 1669 | 8 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); | 1670 | | | 1671 | 8 | if (is_const) { | 1672 | 2 | RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column, | 1673 | 2 | argument_column_2, input_rows_count)); | 1674 | 6 | } else { | 1675 | 6 | RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column, | 1676 | 6 | argument_column_2, input_rows_count)); | 1677 | 6 | } | 1678 | | | 1679 | 8 | block.replace_by_position(result, std::move(res_column)); | 1680 | 8 | return Status::OK(); | 1681 | 8 | } |
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
1682 | | }; |
1683 | | |
1684 | | class FunctionSplitPart : public IFunction { |
1685 | | public: |
1686 | | static constexpr auto name = "split_part"; |
1687 | 8 | static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); } |
1688 | 1 | String get_name() const override { return name; } |
1689 | 0 | size_t get_number_of_arguments() const override { return 3; } |
1690 | | |
1691 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1692 | 0 | return make_nullable(std::make_shared<DataTypeString>()); |
1693 | 0 | } |
1694 | | |
1695 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1696 | 0 | uint32_t result, size_t input_rows_count) const override { |
1697 | 0 | DCHECK_EQ(arguments.size(), 3); |
1698 | |
|
1699 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
1700 | | // Create a zero column to simply implement |
1701 | 0 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
1702 | 0 | auto res = ColumnString::create(); |
1703 | |
|
1704 | 0 | auto& null_map_data = null_map->get_data(); |
1705 | 0 | auto& res_offsets = res->get_offsets(); |
1706 | 0 | auto& res_chars = res->get_chars(); |
1707 | 0 | res_offsets.resize(input_rows_count); |
1708 | |
|
1709 | 0 | const size_t argument_size = arguments.size(); |
1710 | 0 | std::vector<ColumnPtr> argument_columns(argument_size); |
1711 | 0 | for (size_t i = 0; i < argument_size; ++i) { |
1712 | 0 | argument_columns[i] = |
1713 | 0 | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
1714 | 0 | if (const auto* nullable = |
1715 | 0 | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
1716 | | // Danger: Here must dispose the null map data first! Because |
1717 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
1718 | | // of column nullable mem of null map |
1719 | 0 | VectorizedUtils::update_null_map(null_map->get_data(), |
1720 | 0 | nullable->get_null_map_data()); |
1721 | 0 | argument_columns[i] = nullable->get_nested_column_ptr(); |
1722 | 0 | } |
1723 | 0 | } |
1724 | |
|
1725 | 0 | const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
1726 | |
|
1727 | 0 | const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get()); |
1728 | |
|
1729 | 0 | const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
1730 | 0 | const auto& part_num_col_data = part_num_col->get_data(); |
1731 | |
|
1732 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
1733 | 0 | if (part_num_col_data[i] == 0) { |
1734 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
1735 | 0 | continue; |
1736 | 0 | } |
1737 | | |
1738 | 0 | auto delimiter = delimiter_col->get_data_at(i); |
1739 | 0 | auto delimiter_str = delimiter_col->get_data_at(i).to_string(); |
1740 | 0 | auto part_number = part_num_col_data[i]; |
1741 | 0 | auto str = str_col->get_data_at(i); |
1742 | 0 | if (delimiter.size == 0) { |
1743 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
1744 | 0 | continue; |
1745 | 0 | } |
1746 | | |
1747 | 0 | if (part_number > 0) { |
1748 | 0 | if (delimiter.size == 1) { |
1749 | | // If delimiter is a char, use memchr to split |
1750 | 0 | int32_t pre_offset = -1; |
1751 | 0 | int32_t offset = -1; |
1752 | 0 | int32_t num = 0; |
1753 | 0 | while (num < part_number) { |
1754 | 0 | pre_offset = offset; |
1755 | 0 | size_t n = str.size - offset - 1; |
1756 | 0 | const char* pos = reinterpret_cast<const char*>( |
1757 | 0 | memchr(str.data + offset + 1, delimiter_str[0], n)); |
1758 | 0 | if (pos != nullptr) { |
1759 | 0 | offset = pos - str.data; |
1760 | 0 | num++; |
1761 | 0 | } else { |
1762 | 0 | offset = str.size; |
1763 | 0 | num = (num == 0) ? 0 : num + 1; |
1764 | 0 | break; |
1765 | 0 | } |
1766 | 0 | } |
1767 | |
|
1768 | 0 | if (num == part_number) { |
1769 | 0 | StringOP::push_value_string( |
1770 | 0 | std::string_view { |
1771 | 0 | reinterpret_cast<const char*>(str.data + pre_offset + 1), |
1772 | 0 | (size_t)offset - pre_offset - 1}, |
1773 | 0 | i, res_chars, res_offsets); |
1774 | 0 | } else { |
1775 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
1776 | 0 | } |
1777 | 0 | } else { |
1778 | | // If delimiter is a string, use memmem to split |
1779 | 0 | int32_t pre_offset = -delimiter.size; |
1780 | 0 | int32_t offset = -delimiter.size; |
1781 | 0 | int32_t num = 0; |
1782 | 0 | while (num < part_number) { |
1783 | 0 | pre_offset = offset; |
1784 | 0 | size_t n = str.size - offset - delimiter.size; |
1785 | 0 | char* pos = |
1786 | 0 | reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size, |
1787 | 0 | n, delimiter.data, delimiter.size)); |
1788 | 0 | if (pos != nullptr) { |
1789 | 0 | offset = pos - str.data; |
1790 | 0 | num++; |
1791 | 0 | } else { |
1792 | 0 | offset = str.size; |
1793 | 0 | num = (num == 0) ? 0 : num + 1; |
1794 | 0 | break; |
1795 | 0 | } |
1796 | 0 | } |
1797 | |
|
1798 | 0 | if (num == part_number) { |
1799 | 0 | StringOP::push_value_string( |
1800 | 0 | std::string_view {reinterpret_cast<const char*>( |
1801 | 0 | str.data + pre_offset + delimiter.size), |
1802 | 0 | (size_t)offset - pre_offset - delimiter.size}, |
1803 | 0 | i, res_chars, res_offsets); |
1804 | 0 | } else { |
1805 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
1806 | 0 | } |
1807 | 0 | } |
1808 | 0 | } else { |
1809 | 0 | part_number = -part_number; |
1810 | 0 | auto str_str = str.to_string(); |
1811 | 0 | int32_t offset = str.size; |
1812 | 0 | int32_t pre_offset = offset; |
1813 | 0 | int32_t num = 0; |
1814 | 0 | auto substr = str_str; |
1815 | 0 | while (num <= part_number && offset >= 0) { |
1816 | 0 | offset = (int)substr.rfind(delimiter, offset); |
1817 | 0 | if (offset != -1) { |
1818 | 0 | if (++num == part_number) { |
1819 | 0 | break; |
1820 | 0 | } |
1821 | 0 | pre_offset = offset; |
1822 | 0 | offset = offset - 1; |
1823 | 0 | substr = str_str.substr(0, pre_offset); |
1824 | 0 | } else { |
1825 | 0 | break; |
1826 | 0 | } |
1827 | 0 | } |
1828 | 0 | num = (offset == -1 && num != 0) ? num + 1 : num; |
1829 | |
|
1830 | 0 | if (num == part_number) { |
1831 | 0 | if (offset == -1) { |
1832 | 0 | StringOP::push_value_string( |
1833 | 0 | std::string_view {reinterpret_cast<const char*>(str.data), |
1834 | 0 | (size_t)pre_offset}, |
1835 | 0 | i, res_chars, res_offsets); |
1836 | 0 | } else { |
1837 | 0 | StringOP::push_value_string( |
1838 | 0 | std::string_view {str_str.substr( |
1839 | 0 | offset + delimiter.size, |
1840 | 0 | (size_t)pre_offset - offset - delimiter.size)}, |
1841 | 0 | i, res_chars, res_offsets); |
1842 | 0 | } |
1843 | 0 | } else { |
1844 | 0 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
1845 | 0 | } |
1846 | 0 | } |
1847 | 0 | } |
1848 | |
|
1849 | 0 | block.get_by_position(result).column = |
1850 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
1851 | 0 | return Status::OK(); |
1852 | 0 | } |
1853 | | }; |
1854 | | |
1855 | | class FunctionSubstringIndex : public IFunction { |
1856 | | public: |
1857 | | static constexpr auto name = "substring_index"; |
1858 | 8 | static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); } |
1859 | 1 | String get_name() const override { return name; } |
1860 | 0 | size_t get_number_of_arguments() const override { return 3; } |
1861 | | |
1862 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
1863 | 0 | return std::make_shared<DataTypeString>(); |
1864 | 0 | } |
1865 | | |
1866 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
1867 | 0 | uint32_t result, size_t input_rows_count) const override { |
1868 | 0 | DCHECK_EQ(arguments.size(), 3); |
1869 | | |
1870 | | // Create a zero column to simply implement |
1871 | 0 | auto res = ColumnString::create(); |
1872 | |
|
1873 | 0 | auto& res_offsets = res->get_offsets(); |
1874 | 0 | auto& res_chars = res->get_chars(); |
1875 | 0 | res_offsets.resize(input_rows_count); |
1876 | 0 | ColumnPtr content_column; |
1877 | 0 | bool content_const = false; |
1878 | 0 | std::tie(content_column, content_const) = |
1879 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
1880 | |
|
1881 | 0 | const auto* str_col = assert_cast<const ColumnString*>(content_column.get()); |
1882 | | |
1883 | | // Handle both constant and non-constant delimiter parameters |
1884 | 0 | ColumnPtr delimiter_column_ptr; |
1885 | 0 | bool delimiter_const = false; |
1886 | 0 | std::tie(delimiter_column_ptr, delimiter_const) = |
1887 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
1888 | 0 | const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get()); |
1889 | |
|
1890 | 0 | ColumnPtr part_num_column_ptr; |
1891 | 0 | bool part_num_const = false; |
1892 | 0 | std::tie(part_num_column_ptr, part_num_const) = |
1893 | 0 | unpack_if_const(block.get_by_position(arguments[2]).column); |
1894 | 0 | const ColumnInt32* part_num_col = |
1895 | 0 | assert_cast<const ColumnInt32*>(part_num_column_ptr.get()); |
1896 | | |
1897 | | // For constant multi-character delimiters, create StringRef and StringSearch only once |
1898 | 0 | std::optional<StringRef> const_delimiter_ref; |
1899 | 0 | std::optional<StringSearch> const_search; |
1900 | 0 | if (delimiter_const && delimiter_col->get_data_at(0).size > 1) { |
1901 | 0 | const_delimiter_ref.emplace(delimiter_col->get_data_at(0)); |
1902 | 0 | const_search.emplace(&const_delimiter_ref.value()); |
1903 | 0 | } |
1904 | |
|
1905 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
1906 | 0 | auto str = str_col->get_data_at(content_const ? 0 : i); |
1907 | 0 | auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i); |
1908 | 0 | int32_t delimiter_size = delimiter.size; |
1909 | |
|
1910 | 0 | auto part_number = part_num_col->get_element(part_num_const ? 0 : i); |
1911 | |
|
1912 | 0 | if (part_number == 0 || delimiter_size == 0) { |
1913 | 0 | StringOP::push_empty_string(i, res_chars, res_offsets); |
1914 | 0 | continue; |
1915 | 0 | } |
1916 | | |
1917 | 0 | if (part_number > 0) { |
1918 | 0 | if (delimiter_size == 1) { |
1919 | 0 | int32_t offset = -1; |
1920 | 0 | int32_t num = 0; |
1921 | 0 | while (num < part_number) { |
1922 | 0 | size_t n = str.size - offset - 1; |
1923 | 0 | const char* pos = reinterpret_cast<const char*>( |
1924 | 0 | memchr(str.data + offset + 1, delimiter.data[0], n)); |
1925 | 0 | if (pos != nullptr) { |
1926 | 0 | offset = pos - str.data; |
1927 | 0 | num++; |
1928 | 0 | } else { |
1929 | 0 | offset = str.size; |
1930 | 0 | num = (num == 0) ? 0 : num + 1; |
1931 | 0 | break; |
1932 | 0 | } |
1933 | 0 | } |
1934 | |
|
1935 | 0 | if (num == part_number) { |
1936 | 0 | StringOP::push_value_string( |
1937 | 0 | std::string_view {reinterpret_cast<const char*>(str.data), |
1938 | 0 | (size_t)offset}, |
1939 | 0 | i, res_chars, res_offsets); |
1940 | 0 | } else { |
1941 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
1942 | 0 | res_chars, res_offsets); |
1943 | 0 | } |
1944 | 0 | } else { |
1945 | | // For multi-character delimiters |
1946 | | // Use pre-created StringRef and StringSearch for constant delimiters |
1947 | 0 | StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value() |
1948 | 0 | : StringRef(delimiter); |
1949 | 0 | const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr; |
1950 | 0 | StringSearch local_search(&delimiter_ref); |
1951 | 0 | if (!search_ptr) { |
1952 | 0 | search_ptr = &local_search; |
1953 | 0 | } |
1954 | |
|
1955 | 0 | int32_t offset = -delimiter_size; |
1956 | 0 | int32_t num = 0; |
1957 | 0 | while (num < part_number) { |
1958 | 0 | size_t n = str.size - offset - delimiter_size; |
1959 | | // search first match delimter_ref index from src string among str_offset to end |
1960 | 0 | const char* pos = search_ptr->search(str.data + offset + delimiter_size, n); |
1961 | 0 | if (pos < str.data + str.size) { |
1962 | 0 | offset = pos - str.data; |
1963 | 0 | num++; |
1964 | 0 | } else { |
1965 | 0 | offset = str.size; |
1966 | 0 | num = (num == 0) ? 0 : num + 1; |
1967 | 0 | break; |
1968 | 0 | } |
1969 | 0 | } |
1970 | |
|
1971 | 0 | if (num == part_number) { |
1972 | 0 | StringOP::push_value_string( |
1973 | 0 | std::string_view {reinterpret_cast<const char*>(str.data), |
1974 | 0 | (size_t)offset}, |
1975 | 0 | i, res_chars, res_offsets); |
1976 | 0 | } else { |
1977 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
1978 | 0 | res_chars, res_offsets); |
1979 | 0 | } |
1980 | 0 | } |
1981 | 0 | } else { |
1982 | 0 | int neg_part_number = -part_number; |
1983 | 0 | auto str_str = str.to_string(); |
1984 | 0 | int32_t offset = str.size; |
1985 | 0 | int32_t pre_offset = offset; |
1986 | 0 | int32_t num = 0; |
1987 | 0 | auto substr = str_str; |
1988 | | |
1989 | | // Use pre-created StringRef for constant delimiters |
1990 | 0 | StringRef delimiter_str = |
1991 | 0 | const_delimiter_ref |
1992 | 0 | ? const_delimiter_ref.value() |
1993 | 0 | : StringRef(reinterpret_cast<const char*>(delimiter.data), |
1994 | 0 | delimiter.size); |
1995 | |
|
1996 | 0 | while (num <= neg_part_number && offset >= 0) { |
1997 | 0 | offset = (int)substr.rfind(delimiter_str, offset); |
1998 | 0 | if (offset != -1) { |
1999 | 0 | if (++num == neg_part_number) { |
2000 | 0 | break; |
2001 | 0 | } |
2002 | 0 | pre_offset = offset; |
2003 | 0 | offset = offset - 1; |
2004 | 0 | substr = str_str.substr(0, pre_offset); |
2005 | 0 | } else { |
2006 | 0 | break; |
2007 | 0 | } |
2008 | 0 | } |
2009 | 0 | num = (offset == -1 && num != 0) ? num + 1 : num; |
2010 | |
|
2011 | 0 | if (num == neg_part_number) { |
2012 | 0 | if (offset == -1) { |
2013 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, |
2014 | 0 | res_chars, res_offsets); |
2015 | 0 | } else { |
2016 | 0 | StringOP::push_value_string( |
2017 | 0 | std::string_view {str.data + offset + delimiter_size, |
2018 | 0 | str.size - offset - delimiter_size}, |
2019 | 0 | i, res_chars, res_offsets); |
2020 | 0 | } |
2021 | 0 | } else { |
2022 | 0 | StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars, |
2023 | 0 | res_offsets); |
2024 | 0 | } |
2025 | 0 | } |
2026 | 0 | } |
2027 | |
|
2028 | 0 | block.get_by_position(result).column = std::move(res); |
2029 | 0 | return Status::OK(); |
2030 | 0 | } |
2031 | | }; |
2032 | | |
2033 | | class FunctionSplitByString : public IFunction { |
2034 | | public: |
2035 | | static constexpr auto name = "split_by_string"; |
2036 | | |
2037 | 8 | static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); } |
2038 | | using NullMapType = PaddedPODArray<UInt8>; |
2039 | | |
2040 | 1 | String get_name() const override { return name; } |
2041 | | |
2042 | 1 | bool is_variadic() const override { return false; } |
2043 | | |
2044 | 0 | size_t get_number_of_arguments() const override { return 2; } |
2045 | | |
2046 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2047 | 0 | DCHECK(is_string_type(arguments[0]->get_primitive_type())) |
2048 | 0 | << "first argument for function: " << name << " should be string" |
2049 | 0 | << " and arguments[0] is " << arguments[0]->get_name(); |
2050 | 0 | DCHECK(is_string_type(arguments[1]->get_primitive_type())) |
2051 | 0 | << "second argument for function: " << name << " should be string" |
2052 | 0 | << " and arguments[1] is " << arguments[1]->get_name(); |
2053 | 0 | return std::make_shared<DataTypeArray>(make_nullable(arguments[0])); |
2054 | 0 | } |
2055 | | |
2056 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
2057 | 0 | uint32_t result, size_t input_rows_count) const override { |
2058 | 0 | DCHECK_EQ(arguments.size(), 2); |
2059 | |
|
2060 | 0 | const auto& [src_column, left_const] = |
2061 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
2062 | 0 | const auto& [right_column, right_const] = |
2063 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
2064 | |
|
2065 | 0 | DataTypePtr right_column_type = block.get_by_position(arguments[1]).type; |
2066 | 0 | DataTypePtr src_column_type = block.get_by_position(arguments[0]).type; |
2067 | 0 | auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(), |
2068 | 0 | ColumnArray::ColumnOffsets::create()); |
2069 | |
|
2070 | 0 | dest_column_ptr->resize(0); |
2071 | 0 | auto& dest_offsets = dest_column_ptr->get_offsets(); |
2072 | |
|
2073 | 0 | auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data()); |
2074 | 0 | auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get(); |
2075 | |
|
2076 | 0 | const auto* col_str = assert_cast<const ColumnString*>(src_column.get()); |
2077 | |
|
2078 | 0 | const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get()); |
2079 | |
|
2080 | 0 | std::visit( |
2081 | 0 | [&](auto src_const, auto delimiter_const) { |
2082 | 0 | _execute<src_const, delimiter_const>(*col_str, *col_delimiter, |
2083 | 0 | *dest_nested_column, dest_offsets, |
2084 | 0 | input_rows_count); |
2085 | 0 | }, Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_ Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_ |
2086 | 0 | make_bool_variant(left_const), make_bool_variant(right_const)); |
2087 | | |
2088 | | // all elements in dest_nested_column are not null |
2089 | 0 | dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(), |
2090 | 0 | false); |
2091 | 0 | block.replace_by_position(result, std::move(dest_column_ptr)); |
2092 | |
|
2093 | 0 | return Status::OK(); |
2094 | 0 | } |
2095 | | |
2096 | | private: |
2097 | | template <bool src_const, bool delimiter_const> |
2098 | | void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column, |
2099 | | IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets, |
2100 | 0 | size_t size) const { |
2101 | 0 | auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column); |
2102 | 0 | ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); |
2103 | 0 | ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); |
2104 | 0 | column_string_chars.reserve(0); |
2105 | |
|
2106 | 0 | ColumnArray::Offset64 string_pos = 0; |
2107 | 0 | ColumnArray::Offset64 dest_pos = 0; |
2108 | |
|
2109 | 0 | StringSearch search; |
2110 | 0 | StringRef delimiter_ref_for_search; |
2111 | |
|
2112 | 0 | if constexpr (delimiter_const) { |
2113 | 0 | delimiter_ref_for_search = delimiter_column.get_data_at(0); |
2114 | 0 | search.set_pattern(&delimiter_ref_for_search); |
2115 | 0 | } |
2116 | |
|
2117 | 0 | for (size_t i = 0; i < size; i++) { |
2118 | 0 | const StringRef str_ref = |
2119 | 0 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
2120 | 0 | const StringRef delimiter_ref = |
2121 | 0 | delimiter_column.get_data_at(index_check_const<delimiter_const>(i)); |
2122 | |
|
2123 | 0 | if (str_ref.size == 0) { |
2124 | 0 | dest_offsets.push_back(dest_pos); |
2125 | 0 | continue; |
2126 | 0 | } |
2127 | 0 | if (delimiter_ref.size == 0) { |
2128 | 0 | split_empty_delimiter(str_ref, column_string_chars, column_string_offsets, |
2129 | 0 | string_pos, dest_pos); |
2130 | 0 | } else { |
2131 | 0 | if constexpr (!delimiter_const) { |
2132 | 0 | search.set_pattern(&delimiter_ref); |
2133 | 0 | } |
2134 | 0 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
2135 | 0 | const size_t str_offset = str_pos; |
2136 | 0 | const size_t old_size = column_string_chars.size(); |
2137 | | // search first match delimter_ref index from src string among str_offset to end |
2138 | 0 | const char* result_start = |
2139 | 0 | search.search(str_ref.data + str_offset, str_ref.size - str_offset); |
2140 | | // compute split part size |
2141 | 0 | const size_t split_part_size = result_start - str_ref.data - str_offset; |
2142 | | // save dist string split part |
2143 | 0 | if (split_part_size > 0) { |
2144 | 0 | const size_t new_size = old_size + split_part_size; |
2145 | 0 | column_string_chars.resize(new_size); |
2146 | 0 | memcpy_small_allow_read_write_overflow15( |
2147 | 0 | column_string_chars.data() + old_size, str_ref.data + str_offset, |
2148 | 0 | split_part_size); |
2149 | | // add dist string offset |
2150 | 0 | string_pos += split_part_size; |
2151 | 0 | } |
2152 | 0 | column_string_offsets.push_back(string_pos); |
2153 | | // array offset + 1 |
2154 | 0 | dest_pos++; |
2155 | | // add src string str_pos to next search start |
2156 | 0 | str_pos += split_part_size + delimiter_ref.size; |
2157 | 0 | } |
2158 | 0 | } |
2159 | 0 | dest_offsets.push_back(dest_pos); |
2160 | 0 | } |
2161 | 0 | } Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
2162 | | |
2163 | | void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars, |
2164 | | ColumnString::Offsets& column_string_offsets, |
2165 | | ColumnArray::Offset64& string_pos, |
2166 | 0 | ColumnArray::Offset64& dest_pos) const { |
2167 | 0 | const size_t old_size = column_string_chars.size(); |
2168 | 0 | const size_t new_size = old_size + str_ref.size; |
2169 | 0 | column_string_chars.resize(new_size); |
2170 | 0 | memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size); |
2171 | 0 | if (simd::VStringFunctions::is_ascii(str_ref)) { |
2172 | 0 | const auto size = str_ref.size; |
2173 | |
|
2174 | 0 | const auto nested_old_size = column_string_offsets.size(); |
2175 | 0 | const auto nested_new_size = nested_old_size + size; |
2176 | 0 | column_string_offsets.resize(nested_new_size); |
2177 | 0 | std::iota(column_string_offsets.data() + nested_old_size, |
2178 | 0 | column_string_offsets.data() + nested_new_size, string_pos + 1); |
2179 | |
|
2180 | 0 | string_pos += size; |
2181 | 0 | dest_pos += size; |
2182 | | // The above code is equivalent to the code in the following comment. |
2183 | | // for (size_t i = 0; i < str_ref.size; i++) { |
2184 | | // string_pos++; |
2185 | | // column_string_offsets.push_back(string_pos); |
2186 | | // (*dest_nested_null_map).push_back(false); |
2187 | | // dest_pos++; |
2188 | | // } |
2189 | 0 | } else { |
2190 | 0 | for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) { |
2191 | 0 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]]; |
2192 | |
|
2193 | 0 | string_pos += utf8_char_len; |
2194 | 0 | column_string_offsets.push_back(string_pos); |
2195 | 0 | dest_pos++; |
2196 | 0 | } |
2197 | 0 | } |
2198 | 0 | } |
2199 | | }; |
2200 | | |
2201 | | enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS }; |
2202 | | |
2203 | | template <FunctionCountSubStringType type> |
2204 | | class FunctionCountSubString : public IFunction { |
2205 | | public: |
2206 | | static constexpr auto name = "count_substrings"; |
2207 | | static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3; |
2208 | | |
2209 | 235 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv Line | Count | Source | 2209 | 51 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv Line | Count | Source | 2209 | 184 | static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); } |
|
2210 | | using NullMapType = PaddedPODArray<UInt8>; |
2211 | | |
2212 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev |
2213 | | |
2214 | 0 | size_t get_number_of_arguments() const override { return arg_count; }Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv |
2215 | | |
2216 | 219 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2217 | 219 | return std::make_shared<DataTypeInt32>(); |
2218 | 219 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2216 | 43 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2217 | 43 | return std::make_shared<DataTypeInt32>(); | 2218 | 43 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2216 | 176 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2217 | 176 | return std::make_shared<DataTypeInt32>(); | 2218 | 176 | } |
|
2219 | | |
2220 | 14 | DataTypes get_variadic_argument_types_impl() const override { |
2221 | 14 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
2222 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; |
2223 | 7 | } else { |
2224 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
2225 | 7 | std::make_shared<DataTypeInt32>()}; |
2226 | 7 | } |
2227 | 14 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv Line | Count | Source | 2220 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2221 | 7 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 2222 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 2223 | | } else { | 2224 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 2225 | | std::make_shared<DataTypeInt32>()}; | 2226 | | } | 2227 | 7 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv Line | Count | Source | 2220 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2221 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 2222 | | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()}; | 2223 | 7 | } else { | 2224 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 2225 | 7 | std::make_shared<DataTypeInt32>()}; | 2226 | 7 | } | 2227 | 7 | } |
|
2228 | | |
2229 | 221 | bool is_variadic() const override { return true; }_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv Line | Count | Source | 2229 | 44 | bool is_variadic() const override { return true; } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv Line | Count | Source | 2229 | 177 | bool is_variadic() const override { return true; } |
|
2230 | | |
2231 | | Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments, |
2232 | 189 | uint32_t result, size_t input_rows_count) const override { |
2233 | 189 | DCHECK(arg_count); |
2234 | 189 | bool col_const[arg_count]; |
2235 | 189 | ColumnPtr argument_columns[arg_count]; |
2236 | 722 | for (int i = 0; i < arg_count; ++i) { |
2237 | 533 | std::tie(argument_columns[i], col_const[i]) = |
2238 | 533 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2239 | 533 | } |
2240 | | |
2241 | 189 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); |
2242 | 189 | auto& dest_column_data = dest_column_ptr->get_data(); |
2243 | | |
2244 | 189 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { |
2245 | 34 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
2246 | 34 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
2247 | 34 | std::visit( |
2248 | 34 | [&](auto str_const, auto pattern_const) { |
2249 | 34 | _execute<str_const, pattern_const>(src_column_string, pattern_column, |
2250 | 34 | dest_column_data, input_rows_count); |
2251 | 34 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_ Line | Count | Source | 2248 | 12 | [&](auto str_const, auto pattern_const) { | 2249 | 12 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 2250 | 12 | dest_column_data, input_rows_count); | 2251 | 12 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_ Line | Count | Source | 2248 | 11 | [&](auto str_const, auto pattern_const) { | 2249 | 11 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 2250 | 11 | dest_column_data, input_rows_count); | 2251 | 11 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_ Line | Count | Source | 2248 | 11 | [&](auto str_const, auto pattern_const) { | 2249 | 11 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 2250 | 11 | dest_column_data, input_rows_count); | 2251 | 11 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_ |
2252 | 34 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); |
2253 | 155 | } else { |
2254 | 155 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); |
2255 | 155 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); |
2256 | 155 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); |
2257 | 155 | std::visit( |
2258 | 155 | [&](auto str_const, auto pattern_const, auto start_pos_const) { |
2259 | 155 | _execute<str_const, pattern_const, start_pos_const>( |
2260 | 155 | src_column_string, pattern_column, start_pos_column, |
2261 | 155 | dest_column_data, input_rows_count); |
2262 | 155 | }, _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 2258 | 23 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 23 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 23 | src_column_string, pattern_column, start_pos_column, | 2261 | 23 | dest_column_data, input_rows_count); | 2262 | 23 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 2258 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 22 | src_column_string, pattern_column, start_pos_column, | 2261 | 22 | dest_column_data, input_rows_count); | 2262 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 2258 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 22 | src_column_string, pattern_column, start_pos_column, | 2261 | 22 | dest_column_data, input_rows_count); | 2262 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 2258 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 22 | src_column_string, pattern_column, start_pos_column, | 2261 | 22 | dest_column_data, input_rows_count); | 2262 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 2258 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 22 | src_column_string, pattern_column, start_pos_column, | 2261 | 22 | dest_column_data, input_rows_count); | 2262 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 2258 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 22 | src_column_string, pattern_column, start_pos_column, | 2261 | 22 | dest_column_data, input_rows_count); | 2262 | 22 | }, |
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 2258 | 22 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 22 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 22 | src_column_string, pattern_column, start_pos_column, | 2261 | 22 | dest_column_data, input_rows_count); | 2262 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
2263 | 155 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
2264 | 155 | make_bool_variant(col_const[2])); |
2265 | 155 | } |
2266 | | |
2267 | 189 | block.replace_by_position(result, std::move(dest_column_ptr)); |
2268 | 189 | return Status::OK(); |
2269 | 189 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2232 | 34 | uint32_t result, size_t input_rows_count) const override { | 2233 | 34 | DCHECK(arg_count); | 2234 | 34 | bool col_const[arg_count]; | 2235 | 34 | ColumnPtr argument_columns[arg_count]; | 2236 | 102 | for (int i = 0; i < arg_count; ++i) { | 2237 | 68 | std::tie(argument_columns[i], col_const[i]) = | 2238 | 68 | unpack_if_const(block.get_by_position(arguments[i]).column); | 2239 | 68 | } | 2240 | | | 2241 | 34 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 2242 | 34 | auto& dest_column_data = dest_column_ptr->get_data(); | 2243 | | | 2244 | 34 | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 2245 | 34 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 2246 | 34 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 2247 | 34 | std::visit( | 2248 | 34 | [&](auto str_const, auto pattern_const) { | 2249 | 34 | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 2250 | 34 | dest_column_data, input_rows_count); | 2251 | 34 | }, | 2252 | 34 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 2253 | | } else { | 2254 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 2255 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 2256 | | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 2257 | | std::visit( | 2258 | | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | | _execute<str_const, pattern_const, start_pos_const>( | 2260 | | src_column_string, pattern_column, start_pos_column, | 2261 | | dest_column_data, input_rows_count); | 2262 | | }, | 2263 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 2264 | | make_bool_variant(col_const[2])); | 2265 | | } | 2266 | | | 2267 | 34 | block.replace_by_position(result, std::move(dest_column_ptr)); | 2268 | 34 | return Status::OK(); | 2269 | 34 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2232 | 155 | uint32_t result, size_t input_rows_count) const override { | 2233 | 155 | DCHECK(arg_count); | 2234 | 155 | bool col_const[arg_count]; | 2235 | 155 | ColumnPtr argument_columns[arg_count]; | 2236 | 620 | for (int i = 0; i < arg_count; ++i) { | 2237 | 465 | std::tie(argument_columns[i], col_const[i]) = | 2238 | 465 | unpack_if_const(block.get_by_position(arguments[i]).column); | 2239 | 465 | } | 2240 | | | 2241 | 155 | auto dest_column_ptr = ColumnInt32::create(input_rows_count); | 2242 | 155 | auto& dest_column_data = dest_column_ptr->get_data(); | 2243 | | | 2244 | | if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) { | 2245 | | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 2246 | | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 2247 | | std::visit( | 2248 | | [&](auto str_const, auto pattern_const) { | 2249 | | _execute<str_const, pattern_const>(src_column_string, pattern_column, | 2250 | | dest_column_data, input_rows_count); | 2251 | | }, | 2252 | | make_bool_variant(col_const[0]), make_bool_variant(col_const[1])); | 2253 | 155 | } else { | 2254 | 155 | const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]); | 2255 | 155 | const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]); | 2256 | 155 | const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]); | 2257 | 155 | std::visit( | 2258 | 155 | [&](auto str_const, auto pattern_const, auto start_pos_const) { | 2259 | 155 | _execute<str_const, pattern_const, start_pos_const>( | 2260 | 155 | src_column_string, pattern_column, start_pos_column, | 2261 | 155 | dest_column_data, input_rows_count); | 2262 | 155 | }, | 2263 | 155 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 2264 | 155 | make_bool_variant(col_const[2])); | 2265 | 155 | } | 2266 | | | 2267 | 155 | block.replace_by_position(result, std::move(dest_column_ptr)); | 2268 | 155 | return Status::OK(); | 2269 | 155 | } |
|
2270 | | |
2271 | | private: |
2272 | | template <bool src_const, bool pattern_const> |
2273 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
2274 | 34 | ColumnInt32::Container& dest_column_data, size_t size) const { |
2275 | 81 | for (size_t i = 0; i < size; i++) { |
2276 | 47 | const StringRef str_ref = |
2277 | 47 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
2278 | | |
2279 | 47 | const StringRef pattern_ref = |
2280 | 47 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
2281 | 47 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); |
2282 | 47 | } |
2283 | 34 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2274 | 12 | ColumnInt32::Container& dest_column_data, size_t size) const { | 2275 | 37 | for (size_t i = 0; i < size; i++) { | 2276 | 25 | const StringRef str_ref = | 2277 | 25 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2278 | | | 2279 | 25 | const StringRef pattern_ref = | 2280 | 25 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2281 | 25 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 2282 | 25 | } | 2283 | 12 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2274 | 11 | ColumnInt32::Container& dest_column_data, size_t size) const { | 2275 | 22 | for (size_t i = 0; i < size; i++) { | 2276 | 11 | const StringRef str_ref = | 2277 | 11 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2278 | | | 2279 | 11 | const StringRef pattern_ref = | 2280 | 11 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2281 | 11 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 2282 | 11 | } | 2283 | 11 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2274 | 11 | ColumnInt32::Container& dest_column_data, size_t size) const { | 2275 | 22 | for (size_t i = 0; i < size; i++) { | 2276 | 11 | const StringRef str_ref = | 2277 | 11 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2278 | | | 2279 | 11 | const StringRef pattern_ref = | 2280 | 11 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2281 | 11 | dest_column_data[i] = find_str_count(str_ref, pattern_ref); | 2282 | 11 | } | 2283 | 11 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
2284 | | |
2285 | | template <bool src_const, bool pattern_const, bool start_pos_const> |
2286 | | void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column, |
2287 | | const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data, |
2288 | 155 | size_t size) const { |
2289 | 334 | for (size_t i = 0; i < size; i++) { |
2290 | 179 | const StringRef str_ref = |
2291 | 179 | src_column_string.get_data_at(index_check_const<src_const>(i)); |
2292 | 179 | const StringRef pattern_ref = |
2293 | 179 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); |
2294 | | // 1-based index |
2295 | 179 | int32_t start_pos = |
2296 | 179 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; |
2297 | | |
2298 | 179 | const char* p = str_ref.begin(); |
2299 | 179 | const char* end = str_ref.end(); |
2300 | 179 | int char_size = 0; |
2301 | 1.22k | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { |
2302 | 1.04k | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; |
2303 | 1.04k | } |
2304 | 179 | const auto start_byte_len = p - str_ref.begin(); |
2305 | | |
2306 | 179 | if (start_pos < 0 || start_byte_len >= str_ref.size) { |
2307 | 115 | dest_column_data[i] = 0; |
2308 | 115 | } else { |
2309 | 64 | dest_column_data[i] = |
2310 | 64 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); |
2311 | 64 | } |
2312 | 179 | } |
2313 | 155 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 23 | size_t size) const { | 2289 | 70 | for (size_t i = 0; i < size; i++) { | 2290 | 47 | const StringRef str_ref = | 2291 | 47 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 47 | const StringRef pattern_ref = | 2293 | 47 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 47 | int32_t start_pos = | 2296 | 47 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 47 | const char* p = str_ref.begin(); | 2299 | 47 | const char* end = str_ref.end(); | 2300 | 47 | int char_size = 0; | 2301 | 316 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 269 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 269 | } | 2304 | 47 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 47 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 31 | dest_column_data[i] = 0; | 2308 | 31 | } else { | 2309 | 16 | dest_column_data[i] = | 2310 | 16 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 16 | } | 2312 | 47 | } | 2313 | 23 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 22 | size_t size) const { | 2289 | 44 | for (size_t i = 0; i < size; i++) { | 2290 | 22 | const StringRef str_ref = | 2291 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 22 | const StringRef pattern_ref = | 2293 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 22 | int32_t start_pos = | 2296 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 22 | const char* p = str_ref.begin(); | 2299 | 22 | const char* end = str_ref.end(); | 2300 | 22 | int char_size = 0; | 2301 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 129 | } | 2304 | 22 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 14 | dest_column_data[i] = 0; | 2308 | 14 | } else { | 2309 | 8 | dest_column_data[i] = | 2310 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 8 | } | 2312 | 22 | } | 2313 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 22 | size_t size) const { | 2289 | 44 | for (size_t i = 0; i < size; i++) { | 2290 | 22 | const StringRef str_ref = | 2291 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 22 | const StringRef pattern_ref = | 2293 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 22 | int32_t start_pos = | 2296 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 22 | const char* p = str_ref.begin(); | 2299 | 22 | const char* end = str_ref.end(); | 2300 | 22 | int char_size = 0; | 2301 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 129 | } | 2304 | 22 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 14 | dest_column_data[i] = 0; | 2308 | 14 | } else { | 2309 | 8 | dest_column_data[i] = | 2310 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 8 | } | 2312 | 22 | } | 2313 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 22 | size_t size) const { | 2289 | 44 | for (size_t i = 0; i < size; i++) { | 2290 | 22 | const StringRef str_ref = | 2291 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 22 | const StringRef pattern_ref = | 2293 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 22 | int32_t start_pos = | 2296 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 22 | const char* p = str_ref.begin(); | 2299 | 22 | const char* end = str_ref.end(); | 2300 | 22 | int char_size = 0; | 2301 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 129 | } | 2304 | 22 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 14 | dest_column_data[i] = 0; | 2308 | 14 | } else { | 2309 | 8 | dest_column_data[i] = | 2310 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 8 | } | 2312 | 22 | } | 2313 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 22 | size_t size) const { | 2289 | 44 | for (size_t i = 0; i < size; i++) { | 2290 | 22 | const StringRef str_ref = | 2291 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 22 | const StringRef pattern_ref = | 2293 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 22 | int32_t start_pos = | 2296 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 22 | const char* p = str_ref.begin(); | 2299 | 22 | const char* end = str_ref.end(); | 2300 | 22 | int char_size = 0; | 2301 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 129 | } | 2304 | 22 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 14 | dest_column_data[i] = 0; | 2308 | 14 | } else { | 2309 | 8 | dest_column_data[i] = | 2310 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 8 | } | 2312 | 22 | } | 2313 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 22 | size_t size) const { | 2289 | 44 | for (size_t i = 0; i < size; i++) { | 2290 | 22 | const StringRef str_ref = | 2291 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 22 | const StringRef pattern_ref = | 2293 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 22 | int32_t start_pos = | 2296 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 22 | const char* p = str_ref.begin(); | 2299 | 22 | const char* end = str_ref.end(); | 2300 | 22 | int char_size = 0; | 2301 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 129 | } | 2304 | 22 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 14 | dest_column_data[i] = 0; | 2308 | 14 | } else { | 2309 | 8 | dest_column_data[i] = | 2310 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 8 | } | 2312 | 22 | } | 2313 | 22 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm Line | Count | Source | 2288 | 22 | size_t size) const { | 2289 | 44 | for (size_t i = 0; i < size; i++) { | 2290 | 22 | const StringRef str_ref = | 2291 | 22 | src_column_string.get_data_at(index_check_const<src_const>(i)); | 2292 | 22 | const StringRef pattern_ref = | 2293 | 22 | pattern_column.get_data_at(index_check_const<pattern_const>(i)); | 2294 | | // 1-based index | 2295 | 22 | int32_t start_pos = | 2296 | 22 | start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1; | 2297 | | | 2298 | 22 | const char* p = str_ref.begin(); | 2299 | 22 | const char* end = str_ref.end(); | 2300 | 22 | int char_size = 0; | 2301 | 151 | for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) { | 2302 | 129 | char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)]; | 2303 | 129 | } | 2304 | 22 | const auto start_byte_len = p - str_ref.begin(); | 2305 | | | 2306 | 22 | if (start_pos < 0 || start_byte_len >= str_ref.size) { | 2307 | 14 | dest_column_data[i] = 0; | 2308 | 14 | } else { | 2309 | 8 | dest_column_data[i] = | 2310 | 8 | find_str_count(str_ref.substring(start_byte_len), pattern_ref); | 2311 | 8 | } | 2312 | 22 | } | 2313 | 22 | } |
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm |
2314 | | |
2315 | 208 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { |
2316 | 208 | size_t old_size = pos; |
2317 | 208 | size_t str_size = str_ref.size; |
2318 | 1.15k | while (pos < str_size && |
2319 | 1.15k | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, |
2320 | 1.06k | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { |
2321 | 948 | pos++; |
2322 | 948 | } |
2323 | 208 | return pos - old_size; |
2324 | 208 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 2315 | 56 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 2316 | 56 | size_t old_size = pos; | 2317 | 56 | size_t str_size = str_ref.size; | 2318 | 372 | while (pos < str_size && | 2319 | 372 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 2320 | 344 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 2321 | 316 | pos++; | 2322 | 316 | } | 2323 | 56 | return pos - old_size; | 2324 | 56 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_ Line | Count | Source | 2315 | 152 | size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const { | 2316 | 152 | size_t old_size = pos; | 2317 | 152 | size_t str_size = str_ref.size; | 2318 | 784 | while (pos < str_size && | 2319 | 784 | memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos, | 2320 | 720 | (const uint8_t*)pattern_ref.data, pattern_ref.size)) { | 2321 | 632 | pos++; | 2322 | 632 | } | 2323 | 152 | return pos - old_size; | 2324 | 152 | } |
|
2325 | | |
2326 | 111 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { |
2327 | 111 | int count = 0; |
2328 | 111 | if (str_ref.size == 0 || pattern_ref.size == 0) { |
2329 | 19 | return 0; |
2330 | 92 | } else { |
2331 | 208 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { |
2332 | 208 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); |
2333 | 208 | if (res_pos == (str_ref.size - str_pos)) { |
2334 | 92 | break; // not find |
2335 | 92 | } |
2336 | 116 | count++; |
2337 | 116 | str_pos = str_pos + res_pos + pattern_ref.size; |
2338 | 116 | } |
2339 | 92 | } |
2340 | 92 | return count; |
2341 | 111 | } _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 2326 | 47 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 2327 | 47 | int count = 0; | 2328 | 47 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 2329 | 19 | return 0; | 2330 | 28 | } else { | 2331 | 56 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 2332 | 56 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 2333 | 56 | if (res_pos == (str_ref.size - str_pos)) { | 2334 | 28 | break; // not find | 2335 | 28 | } | 2336 | 28 | count++; | 2337 | 28 | str_pos = str_pos + res_pos + pattern_ref.size; | 2338 | 28 | } | 2339 | 28 | } | 2340 | 28 | return count; | 2341 | 47 | } |
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_ Line | Count | Source | 2326 | 64 | int find_str_count(const StringRef str_ref, StringRef pattern_ref) const { | 2327 | 64 | int count = 0; | 2328 | 64 | if (str_ref.size == 0 || pattern_ref.size == 0) { | 2329 | 0 | return 0; | 2330 | 64 | } else { | 2331 | 152 | for (size_t str_pos = 0; str_pos <= str_ref.size;) { | 2332 | 152 | const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref); | 2333 | 152 | if (res_pos == (str_ref.size - str_pos)) { | 2334 | 64 | break; // not find | 2335 | 64 | } | 2336 | 88 | count++; | 2337 | 88 | str_pos = str_pos + res_pos + pattern_ref.size; | 2338 | 88 | } | 2339 | 64 | } | 2340 | 64 | return count; | 2341 | 64 | } |
|
2342 | | }; |
2343 | | |
2344 | | struct SM3Sum { |
2345 | | static constexpr auto name = "sm3sum"; |
2346 | | using ObjectData = SM3Digest; |
2347 | | }; |
2348 | | |
2349 | | struct MD5Sum { |
2350 | | static constexpr auto name = "md5sum"; |
2351 | | using ObjectData = Md5Digest; |
2352 | | }; |
2353 | | |
2354 | | template <typename Impl> |
2355 | | class FunctionStringDigestMulti : public IFunction { |
2356 | | public: |
2357 | | static constexpr auto name = Impl::name; |
2358 | 220 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }_ZN5doris25FunctionStringDigestMultiINS_6SM3SumEE6createEv Line | Count | Source | 2358 | 110 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); } |
_ZN5doris25FunctionStringDigestMultiINS_6MD5SumEE6createEv Line | Count | Source | 2358 | 110 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); } |
|
2359 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE8get_nameB5cxx11Ev |
2360 | 0 | size_t get_number_of_arguments() const override { return 0; }Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE23get_number_of_argumentsEv |
2361 | 206 | bool is_variadic() const override { return true; }_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE11is_variadicEv Line | Count | Source | 2361 | 103 | bool is_variadic() const override { return true; } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE11is_variadicEv Line | Count | Source | 2361 | 103 | bool is_variadic() const override { return true; } |
|
2362 | | |
2363 | 204 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2364 | 204 | return std::make_shared<DataTypeString>(); |
2365 | 204 | } _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2363 | 102 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2364 | 102 | return std::make_shared<DataTypeString>(); | 2365 | 102 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2363 | 102 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2364 | 102 | return std::make_shared<DataTypeString>(); | 2365 | 102 | } |
|
2366 | | |
2367 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2368 | 160 | uint32_t result, size_t input_rows_count) const override { |
2369 | 160 | DCHECK_GE(arguments.size(), 1); |
2370 | | |
2371 | 160 | auto res = ColumnString::create(); |
2372 | 160 | auto& res_data = res->get_chars(); |
2373 | 160 | auto& res_offset = res->get_offsets(); |
2374 | 160 | res_offset.resize(input_rows_count); |
2375 | | |
2376 | 160 | std::vector<ColumnPtr> argument_columns(arguments.size()); |
2377 | 160 | std::vector<uint8_t> is_const(arguments.size(), 0); |
2378 | 536 | for (size_t i = 0; i < arguments.size(); ++i) { |
2379 | 376 | std::tie(argument_columns[i], is_const[i]) = |
2380 | 376 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2381 | 376 | } |
2382 | | |
2383 | 160 | if (check_and_get_column<ColumnString>(argument_columns[0].get())) { |
2384 | 80 | vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const, |
2385 | 80 | res_data, res_offset); |
2386 | 80 | } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) { |
2387 | 80 | vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const, |
2388 | 80 | res_data, res_offset); |
2389 | 80 | } else { |
2390 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
2391 | 0 | argument_columns[0]->get_name(), get_name()); |
2392 | 0 | } |
2393 | | |
2394 | 160 | block.replace_by_position(result, std::move(res)); |
2395 | 160 | return Status::OK(); |
2396 | 160 | } _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2368 | 80 | uint32_t result, size_t input_rows_count) const override { | 2369 | 80 | DCHECK_GE(arguments.size(), 1); | 2370 | | | 2371 | 80 | auto res = ColumnString::create(); | 2372 | 80 | auto& res_data = res->get_chars(); | 2373 | 80 | auto& res_offset = res->get_offsets(); | 2374 | 80 | res_offset.resize(input_rows_count); | 2375 | | | 2376 | 80 | std::vector<ColumnPtr> argument_columns(arguments.size()); | 2377 | 80 | std::vector<uint8_t> is_const(arguments.size(), 0); | 2378 | 268 | for (size_t i = 0; i < arguments.size(); ++i) { | 2379 | 188 | std::tie(argument_columns[i], is_const[i]) = | 2380 | 188 | unpack_if_const(block.get_by_position(arguments[i]).column); | 2381 | 188 | } | 2382 | | | 2383 | 80 | if (check_and_get_column<ColumnString>(argument_columns[0].get())) { | 2384 | 40 | vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const, | 2385 | 40 | res_data, res_offset); | 2386 | 40 | } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) { | 2387 | 40 | vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const, | 2388 | 40 | res_data, res_offset); | 2389 | 40 | } else { | 2390 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 2391 | 0 | argument_columns[0]->get_name(), get_name()); | 2392 | 0 | } | 2393 | | | 2394 | 80 | block.replace_by_position(result, std::move(res)); | 2395 | 80 | return Status::OK(); | 2396 | 80 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2368 | 80 | uint32_t result, size_t input_rows_count) const override { | 2369 | 80 | DCHECK_GE(arguments.size(), 1); | 2370 | | | 2371 | 80 | auto res = ColumnString::create(); | 2372 | 80 | auto& res_data = res->get_chars(); | 2373 | 80 | auto& res_offset = res->get_offsets(); | 2374 | 80 | res_offset.resize(input_rows_count); | 2375 | | | 2376 | 80 | std::vector<ColumnPtr> argument_columns(arguments.size()); | 2377 | 80 | std::vector<uint8_t> is_const(arguments.size(), 0); | 2378 | 268 | for (size_t i = 0; i < arguments.size(); ++i) { | 2379 | 188 | std::tie(argument_columns[i], is_const[i]) = | 2380 | 188 | unpack_if_const(block.get_by_position(arguments[i]).column); | 2381 | 188 | } | 2382 | | | 2383 | 80 | if (check_and_get_column<ColumnString>(argument_columns[0].get())) { | 2384 | 40 | vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const, | 2385 | 40 | res_data, res_offset); | 2386 | 40 | } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) { | 2387 | 40 | vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const, | 2388 | 40 | res_data, res_offset); | 2389 | 40 | } else { | 2390 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", | 2391 | 0 | argument_columns[0]->get_name(), get_name()); | 2392 | 0 | } | 2393 | | | 2394 | 80 | block.replace_by_position(result, std::move(res)); | 2395 | 80 | return Status::OK(); | 2396 | 80 | } |
|
2397 | | |
2398 | | private: |
2399 | | template <typename ColumnType> |
2400 | | void vector_execute(Block& block, size_t input_rows_count, |
2401 | | const std::vector<ColumnPtr>& argument_columns, |
2402 | | const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data, |
2403 | 160 | ColumnString::Offsets& res_offset) const { |
2404 | 160 | using ObjectData = typename Impl::ObjectData; |
2405 | 372 | for (size_t i = 0; i < input_rows_count; ++i) { |
2406 | 212 | ObjectData digest; |
2407 | 676 | for (size_t j = 0; j < argument_columns.size(); ++j) { |
2408 | 464 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); |
2409 | 464 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); |
2410 | 464 | if (data_ref.size < 1) { |
2411 | 120 | continue; |
2412 | 120 | } |
2413 | 344 | digest.update(data_ref.data, data_ref.size); |
2414 | 344 | } |
2415 | 212 | digest.digest(); |
2416 | 212 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), |
2417 | 212 | i, res_data, res_offset); |
2418 | 212 | } |
2419 | 160 | } _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE Line | Count | Source | 2403 | 40 | ColumnString::Offsets& res_offset) const { | 2404 | 40 | using ObjectData = typename Impl::ObjectData; | 2405 | 93 | for (size_t i = 0; i < input_rows_count; ++i) { | 2406 | 53 | ObjectData digest; | 2407 | 169 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 2408 | 116 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 2409 | 116 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 2410 | 116 | if (data_ref.size < 1) { | 2411 | 30 | continue; | 2412 | 30 | } | 2413 | 86 | digest.update(data_ref.data, data_ref.size); | 2414 | 86 | } | 2415 | 53 | digest.digest(); | 2416 | 53 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 2417 | 53 | i, res_data, res_offset); | 2418 | 53 | } | 2419 | 40 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE Line | Count | Source | 2403 | 40 | ColumnString::Offsets& res_offset) const { | 2404 | 40 | using ObjectData = typename Impl::ObjectData; | 2405 | 93 | for (size_t i = 0; i < input_rows_count; ++i) { | 2406 | 53 | ObjectData digest; | 2407 | 169 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 2408 | 116 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 2409 | 116 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 2410 | 116 | if (data_ref.size < 1) { | 2411 | 30 | continue; | 2412 | 30 | } | 2413 | 86 | digest.update(data_ref.data, data_ref.size); | 2414 | 86 | } | 2415 | 53 | digest.digest(); | 2416 | 53 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 2417 | 53 | i, res_data, res_offset); | 2418 | 53 | } | 2419 | 40 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE Line | Count | Source | 2403 | 40 | ColumnString::Offsets& res_offset) const { | 2404 | 40 | using ObjectData = typename Impl::ObjectData; | 2405 | 93 | for (size_t i = 0; i < input_rows_count; ++i) { | 2406 | 53 | ObjectData digest; | 2407 | 169 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 2408 | 116 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 2409 | 116 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 2410 | 116 | if (data_ref.size < 1) { | 2411 | 30 | continue; | 2412 | 30 | } | 2413 | 86 | digest.update(data_ref.data, data_ref.size); | 2414 | 86 | } | 2415 | 53 | digest.digest(); | 2416 | 53 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 2417 | 53 | i, res_data, res_offset); | 2418 | 53 | } | 2419 | 40 | } |
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE Line | Count | Source | 2403 | 40 | ColumnString::Offsets& res_offset) const { | 2404 | 40 | using ObjectData = typename Impl::ObjectData; | 2405 | 93 | for (size_t i = 0; i < input_rows_count; ++i) { | 2406 | 53 | ObjectData digest; | 2407 | 169 | for (size_t j = 0; j < argument_columns.size(); ++j) { | 2408 | 116 | const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get()); | 2409 | 116 | StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i); | 2410 | 116 | if (data_ref.size < 1) { | 2411 | 30 | continue; | 2412 | 30 | } | 2413 | 86 | digest.update(data_ref.data, data_ref.size); | 2414 | 86 | } | 2415 | 53 | digest.digest(); | 2416 | 53 | StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()), | 2417 | 53 | i, res_data, res_offset); | 2418 | 53 | } | 2419 | 40 | } |
|
2420 | | }; |
2421 | | |
2422 | | class FunctionStringDigestSHA1 : public IFunction { |
2423 | | public: |
2424 | | static constexpr auto name = "sha1"; |
2425 | 20 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA1>(); } |
2426 | 0 | String get_name() const override { return name; } |
2427 | 0 | size_t get_number_of_arguments() const override { return 1; } |
2428 | 13 | bool is_variadic() const override { return true; } |
2429 | | |
2430 | 12 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2431 | 12 | return std::make_shared<DataTypeString>(); |
2432 | 12 | } |
2433 | | |
2434 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2435 | 10 | uint32_t result, size_t input_rows_count) const override { |
2436 | 10 | DCHECK_EQ(arguments.size(), 1); |
2437 | 10 | ColumnPtr data_col = block.get_by_position(arguments[0]).column; |
2438 | | |
2439 | 10 | auto res_col = ColumnString::create(); |
2440 | 10 | auto& res_data = res_col->get_chars(); |
2441 | 10 | auto& res_offset = res_col->get_offsets(); |
2442 | 10 | res_offset.resize(input_rows_count); |
2443 | 10 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { |
2444 | 5 | vector_execute(str_col, input_rows_count, res_data, res_offset); |
2445 | 5 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { |
2446 | 5 | vector_execute(vb_col, input_rows_count, res_data, res_offset); |
2447 | 5 | } else { |
2448 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
2449 | 0 | data_col->get_name(), get_name()); |
2450 | 0 | } |
2451 | | |
2452 | 10 | block.replace_by_position(result, std::move(res_col)); |
2453 | 10 | return Status::OK(); |
2454 | 10 | } |
2455 | | |
2456 | | private: |
2457 | | template <typename ColumnType> |
2458 | | void vector_execute(const ColumnType* col, size_t input_rows_count, |
2459 | 10 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { |
2460 | 10 | SHA1Digest digest; |
2461 | 28 | for (size_t i = 0; i < input_rows_count; ++i) { |
2462 | 18 | StringRef data_ref = col->get_data_at(i); |
2463 | 18 | digest.reset(data_ref.data, data_ref.size); |
2464 | 18 | std::string_view ans = digest.digest(); |
2465 | | |
2466 | 18 | StringOP::push_value_string(ans, i, res_data, res_offset); |
2467 | 18 | } |
2468 | 10 | } _ZNK5doris24FunctionStringDigestSHA114vector_executeINS_9ColumnStrIjEEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE Line | Count | Source | 2459 | 5 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 2460 | 5 | SHA1Digest digest; | 2461 | 14 | for (size_t i = 0; i < input_rows_count; ++i) { | 2462 | 9 | StringRef data_ref = col->get_data_at(i); | 2463 | 9 | digest.reset(data_ref.data, data_ref.size); | 2464 | 9 | std::string_view ans = digest.digest(); | 2465 | | | 2466 | 9 | StringOP::push_value_string(ans, i, res_data, res_offset); | 2467 | 9 | } | 2468 | 5 | } |
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_15ColumnVarbinaryEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS6_IjLm4096ES9_Lm16ELm15EEE Line | Count | Source | 2459 | 5 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { | 2460 | 5 | SHA1Digest digest; | 2461 | 14 | for (size_t i = 0; i < input_rows_count; ++i) { | 2462 | 9 | StringRef data_ref = col->get_data_at(i); | 2463 | 9 | digest.reset(data_ref.data, data_ref.size); | 2464 | 9 | std::string_view ans = digest.digest(); | 2465 | | | 2466 | 9 | StringOP::push_value_string(ans, i, res_data, res_offset); | 2467 | 9 | } | 2468 | 5 | } |
|
2469 | | }; |
2470 | | |
2471 | | class FunctionStringDigestSHA2 : public IFunction { |
2472 | | public: |
2473 | | static constexpr auto name = "sha2"; |
2474 | 8 | static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA2>(); } |
2475 | 0 | String get_name() const override { return name; } |
2476 | 0 | size_t get_number_of_arguments() const override { return 2; } |
2477 | 1 | bool is_variadic() const override { return true; } |
2478 | | |
2479 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2480 | 0 | return std::make_shared<DataTypeString>(); |
2481 | 0 | } |
2482 | | |
2483 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2484 | 0 | uint32_t result, size_t input_rows_count) const override { |
2485 | 0 | DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column)); |
2486 | |
|
2487 | 0 | ColumnPtr data_col = block.get_by_position(arguments[0]).column; |
2488 | |
|
2489 | 0 | [[maybe_unused]] const auto& [right_column, right_const] = |
2490 | 0 | unpack_if_const(block.get_by_position(arguments[1]).column); |
2491 | 0 | auto digest_length = assert_cast<const ColumnInt32*>(right_column.get())->get_data()[0]; |
2492 | |
|
2493 | 0 | auto res_col = ColumnString::create(); |
2494 | 0 | auto& res_data = res_col->get_chars(); |
2495 | 0 | auto& res_offset = res_col->get_offsets(); |
2496 | 0 | res_offset.resize(input_rows_count); |
2497 | |
|
2498 | 0 | if (digest_length == 224) { |
2499 | 0 | execute_base<SHA224Digest>(data_col, input_rows_count, res_data, res_offset); |
2500 | 0 | } else if (digest_length == 256) { |
2501 | 0 | execute_base<SHA256Digest>(data_col, input_rows_count, res_data, res_offset); |
2502 | 0 | } else if (digest_length == 384) { |
2503 | 0 | execute_base<SHA384Digest>(data_col, input_rows_count, res_data, res_offset); |
2504 | 0 | } else if (digest_length == 512) { |
2505 | 0 | execute_base<SHA512Digest>(data_col, input_rows_count, res_data, res_offset); |
2506 | 0 | } else { |
2507 | 0 | return Status::InvalidArgument( |
2508 | 0 | "sha2's digest length only support 224/256/384/512 but meet {}", digest_length); |
2509 | 0 | } |
2510 | | |
2511 | 0 | block.replace_by_position(result, std::move(res_col)); |
2512 | 0 | return Status::OK(); |
2513 | 0 | } |
2514 | | |
2515 | | private: |
2516 | | template <typename T> |
2517 | | void execute_base(ColumnPtr data_col, int input_rows_count, ColumnString::Chars& res_data, |
2518 | 0 | ColumnString::Offsets& res_offset) const { |
2519 | 0 | if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) { |
2520 | 0 | vector_execute<T>(str_col, input_rows_count, res_data, res_offset); |
2521 | 0 | } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) { |
2522 | 0 | vector_execute<T>(vb_col, input_rows_count, res_data, res_offset); |
2523 | 0 | } else { |
2524 | 0 | throw Exception(ErrorCode::RUNTIME_ERROR, |
2525 | 0 | "Illegal column {} of argument of function {}", data_col->get_name(), |
2526 | 0 | get_name()); |
2527 | 0 | } |
2528 | 0 | } Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA224DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA256DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA384DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA512DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE |
2529 | | |
2530 | | template <typename DigestType, typename ColumnType> |
2531 | | void vector_execute(const ColumnType* col, size_t input_rows_count, |
2532 | 0 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const { |
2533 | 0 | DigestType digest; |
2534 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
2535 | 0 | StringRef data_ref = col->get_data_at(i); |
2536 | 0 | digest.reset(data_ref.data, data_ref.size); |
2537 | 0 | std::string_view ans = digest.digest(); |
2538 | |
|
2539 | 0 | StringOP::push_value_string(ans, i, res_data, res_offset); |
2540 | 0 | } |
2541 | 0 | } Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE |
2542 | | }; |
2543 | | |
2544 | | class FunctionExtractURLParameter : public IFunction { |
2545 | | public: |
2546 | | static constexpr auto name = "extract_url_parameter"; |
2547 | 45 | static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); } |
2548 | 1 | String get_name() const override { return name; } |
2549 | 37 | size_t get_number_of_arguments() const override { return 2; } |
2550 | | |
2551 | 37 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2552 | 37 | return std::make_shared<DataTypeString>(); |
2553 | 37 | } |
2554 | | |
2555 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2556 | 37 | uint32_t result, size_t input_rows_count) const override { |
2557 | 37 | auto col_url = |
2558 | 37 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
2559 | 37 | auto col_parameter = |
2560 | 37 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
2561 | 37 | auto url_col = assert_cast<const ColumnString*>(col_url.get()); |
2562 | 37 | auto parameter_col = assert_cast<const ColumnString*>(col_parameter.get()); |
2563 | | |
2564 | 37 | ColumnString::MutablePtr col_res = ColumnString::create(); |
2565 | | |
2566 | 85 | for (int i = 0; i < input_rows_count; ++i) { |
2567 | 48 | auto source = url_col->get_data_at(i); |
2568 | 48 | auto param = parameter_col->get_data_at(i); |
2569 | 48 | auto res = extract_url(source, param); |
2570 | | |
2571 | 48 | col_res->insert_data(res.data, res.size); |
2572 | 48 | } |
2573 | | |
2574 | 37 | block.replace_by_position(result, std::move(col_res)); |
2575 | 37 | return Status::OK(); |
2576 | 37 | } |
2577 | | |
2578 | | private: |
2579 | 48 | StringRef extract_url(StringRef url, StringRef parameter) const { |
2580 | 48 | if (url.size == 0 || parameter.size == 0) { |
2581 | 8 | return StringRef("", 0); |
2582 | 8 | } |
2583 | 40 | return UrlParser::extract_url(url, parameter); |
2584 | 48 | } |
2585 | | }; |
2586 | | |
2587 | | class FunctionStringParseUrl : public IFunction { |
2588 | | public: |
2589 | | static constexpr auto name = "parse_url"; |
2590 | 101 | static FunctionPtr create() { return std::make_shared<FunctionStringParseUrl>(); } |
2591 | 0 | String get_name() const override { return name; } |
2592 | 0 | size_t get_number_of_arguments() const override { return 0; } |
2593 | 94 | bool is_variadic() const override { return true; } |
2594 | | |
2595 | 93 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2596 | 93 | return make_nullable(std::make_shared<DataTypeString>()); |
2597 | 93 | } |
2598 | | |
2599 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2600 | 93 | uint32_t result, size_t input_rows_count) const override { |
2601 | 93 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
2602 | 93 | auto& null_map_data = null_map->get_data(); |
2603 | 93 | DCHECK_GE(3, arguments.size()); |
2604 | 93 | auto res = ColumnString::create(); |
2605 | 93 | auto& res_offsets = res->get_offsets(); |
2606 | 93 | auto& res_chars = res->get_chars(); |
2607 | 93 | res_offsets.resize(input_rows_count); |
2608 | | |
2609 | 93 | size_t argument_size = arguments.size(); |
2610 | 93 | const bool has_key = argument_size == 3; |
2611 | | |
2612 | 93 | std::vector<ColumnPtr> argument_columns(argument_size); |
2613 | 93 | std::vector<UInt8> col_const(argument_size); |
2614 | 308 | for (size_t i = 0; i < argument_size; ++i) { |
2615 | 215 | std::tie(argument_columns[i], col_const[i]) = |
2616 | 215 | unpack_if_const(block.get_by_position(arguments[i]).column); |
2617 | 215 | } |
2618 | | |
2619 | 93 | const auto* url_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
2620 | 93 | const auto* part_col = assert_cast<const ColumnString*>(argument_columns[1].get()); |
2621 | 93 | const bool part_const = col_const[1]; |
2622 | 93 | std::vector<UrlParser::UrlPart> url_parts; |
2623 | 93 | const int part_nums = part_const ? 1 : input_rows_count; |
2624 | | |
2625 | 93 | url_parts.resize(part_nums); |
2626 | 209 | for (int i = 0; i < part_nums; i++) { |
2627 | 116 | StringRef part = part_col->get_data_at(i); |
2628 | 116 | UrlParser::UrlPart url_part = UrlParser::get_url_part(part); |
2629 | 116 | if (url_part == UrlParser::INVALID) { |
2630 | 0 | return Status::RuntimeError("Invalid URL part: {}\n{}", |
2631 | 0 | std::string(part.data, part.size), |
2632 | 0 | "(Valid URL parts are 'PROTOCOL', 'HOST', " |
2633 | 0 | "'PATH', 'REF', 'AUTHORITY', " |
2634 | 0 | "'FILE', 'USERINFO', 'PORT' and 'QUERY')"); |
2635 | 0 | } |
2636 | 116 | url_parts[i] = url_part; |
2637 | 116 | } |
2638 | | |
2639 | 93 | if (has_key) { |
2640 | 29 | const bool url_const = col_const[0]; |
2641 | 29 | const bool key_const = col_const[2]; |
2642 | 29 | const auto* key_col = assert_cast<const ColumnString*>(argument_columns[2].get()); |
2643 | 29 | RETURN_IF_ERROR(std::visit( |
2644 | 29 | [&](auto url_const, auto part_const, auto key_const) { |
2645 | 29 | return vector_parse_key<url_const, part_const, key_const>( |
2646 | 29 | url_col, url_parts, key_col, input_rows_count, null_map_data, |
2647 | 29 | res_chars, res_offsets); |
2648 | 29 | }, |
2649 | 29 | make_bool_variant(url_const), make_bool_variant(part_const), |
2650 | 29 | make_bool_variant(key_const))); |
2651 | 64 | } else { |
2652 | 64 | const bool url_const = col_const[0]; |
2653 | 64 | RETURN_IF_ERROR(std::visit( |
2654 | 64 | [&](auto url_const, auto part_const) { |
2655 | 64 | return vector_parse<url_const, part_const>(url_col, url_parts, |
2656 | 64 | input_rows_count, null_map_data, |
2657 | 64 | res_chars, res_offsets); |
2658 | 64 | }, |
2659 | 64 | make_bool_variant(url_const), make_bool_variant(part_const))); |
2660 | 64 | } |
2661 | 93 | block.get_by_position(result).column = |
2662 | 93 | ColumnNullable::create(std::move(res), std::move(null_map)); |
2663 | 93 | return Status::OK(); |
2664 | 93 | } |
2665 | | template <bool url_const, bool part_const> |
2666 | | static Status vector_parse(const ColumnString* url_col, |
2667 | | std::vector<UrlParser::UrlPart>& url_parts, const int size, |
2668 | | ColumnUInt8::Container& null_map_data, |
2669 | 64 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { |
2670 | 148 | for (size_t i = 0; i < size; ++i) { |
2671 | 84 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; |
2672 | 84 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); |
2673 | 84 | StringRef parse_res; |
2674 | 84 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { |
2675 | 64 | if (parse_res.empty()) [[unlikely]] { |
2676 | 4 | StringOP::push_empty_string(i, res_chars, res_offsets); |
2677 | 4 | continue; |
2678 | 4 | } |
2679 | 60 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, |
2680 | 60 | res_chars, res_offsets); |
2681 | 60 | } else { |
2682 | 20 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
2683 | 20 | } |
2684 | 84 | } |
2685 | 64 | return Status::OK(); |
2686 | 64 | } _ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2669 | 22 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { | 2670 | 64 | for (size_t i = 0; i < size; ++i) { | 2671 | 42 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2672 | 42 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2673 | 42 | StringRef parse_res; | 2674 | 42 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { | 2675 | 32 | if (parse_res.empty()) [[unlikely]] { | 2676 | 2 | StringOP::push_empty_string(i, res_chars, res_offsets); | 2677 | 2 | continue; | 2678 | 2 | } | 2679 | 30 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2680 | 30 | res_chars, res_offsets); | 2681 | 30 | } else { | 2682 | 10 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2683 | 10 | } | 2684 | 42 | } | 2685 | 22 | return Status::OK(); | 2686 | 22 | } |
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2669 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { | 2670 | 42 | for (size_t i = 0; i < size; ++i) { | 2671 | 21 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2672 | 21 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2673 | 21 | StringRef parse_res; | 2674 | 21 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { | 2675 | 16 | if (parse_res.empty()) [[unlikely]] { | 2676 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 2677 | 1 | continue; | 2678 | 1 | } | 2679 | 15 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2680 | 15 | res_chars, res_offsets); | 2681 | 15 | } else { | 2682 | 5 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2683 | 5 | } | 2684 | 21 | } | 2685 | 21 | return Status::OK(); | 2686 | 21 | } |
_ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2669 | 21 | ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) { | 2670 | 42 | for (size_t i = 0; i < size; ++i) { | 2671 | 21 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2672 | 21 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2673 | 21 | StringRef parse_res; | 2674 | 21 | if (UrlParser::parse_url(url_val, url_part, &parse_res)) { | 2675 | 16 | if (parse_res.empty()) [[unlikely]] { | 2676 | 1 | StringOP::push_empty_string(i, res_chars, res_offsets); | 2677 | 1 | continue; | 2678 | 1 | } | 2679 | 15 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2680 | 15 | res_chars, res_offsets); | 2681 | 15 | } else { | 2682 | 5 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2683 | 5 | } | 2684 | 21 | } | 2685 | 21 | return Status::OK(); | 2686 | 21 | } |
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE |
2687 | | template <bool url_const, bool part_const, bool key_const> |
2688 | | static Status vector_parse_key(const ColumnString* url_col, |
2689 | | std::vector<UrlParser::UrlPart>& url_parts, |
2690 | | const ColumnString* key_col, const int size, |
2691 | | ColumnUInt8::Container& null_map_data, |
2692 | | ColumnString::Chars& res_chars, |
2693 | 29 | ColumnString::Offsets& res_offsets) { |
2694 | 61 | for (size_t i = 0; i < size; ++i) { |
2695 | 32 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; |
2696 | 32 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); |
2697 | 32 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); |
2698 | 32 | StringRef parse_res; |
2699 | 32 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { |
2700 | 16 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, |
2701 | 16 | res_chars, res_offsets); |
2702 | 16 | } else { |
2703 | 16 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); |
2704 | 16 | continue; |
2705 | 16 | } |
2706 | 32 | } |
2707 | 29 | return Status::OK(); |
2708 | 29 | } _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 5 | ColumnString::Offsets& res_offsets) { | 2694 | 13 | for (size_t i = 0; i < size; ++i) { | 2695 | 8 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 8 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 8 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 8 | StringRef parse_res; | 2699 | 8 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 4 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 4 | res_chars, res_offsets); | 2702 | 4 | } else { | 2703 | 4 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 4 | continue; | 2705 | 4 | } | 2706 | 8 | } | 2707 | 5 | return Status::OK(); | 2708 | 5 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 4 | ColumnString::Offsets& res_offsets) { | 2694 | 8 | for (size_t i = 0; i < size; ++i) { | 2695 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 4 | StringRef parse_res; | 2699 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 2 | res_chars, res_offsets); | 2702 | 2 | } else { | 2703 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 2 | continue; | 2705 | 2 | } | 2706 | 4 | } | 2707 | 4 | return Status::OK(); | 2708 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 4 | ColumnString::Offsets& res_offsets) { | 2694 | 8 | for (size_t i = 0; i < size; ++i) { | 2695 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 4 | StringRef parse_res; | 2699 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 2 | res_chars, res_offsets); | 2702 | 2 | } else { | 2703 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 2 | continue; | 2705 | 2 | } | 2706 | 4 | } | 2707 | 4 | return Status::OK(); | 2708 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 4 | ColumnString::Offsets& res_offsets) { | 2694 | 8 | for (size_t i = 0; i < size; ++i) { | 2695 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 4 | StringRef parse_res; | 2699 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 2 | res_chars, res_offsets); | 2702 | 2 | } else { | 2703 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 2 | continue; | 2705 | 2 | } | 2706 | 4 | } | 2707 | 4 | return Status::OK(); | 2708 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 4 | ColumnString::Offsets& res_offsets) { | 2694 | 8 | for (size_t i = 0; i < size; ++i) { | 2695 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 4 | StringRef parse_res; | 2699 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 2 | res_chars, res_offsets); | 2702 | 2 | } else { | 2703 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 2 | continue; | 2705 | 2 | } | 2706 | 4 | } | 2707 | 4 | return Status::OK(); | 2708 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 4 | ColumnString::Offsets& res_offsets) { | 2694 | 8 | for (size_t i = 0; i < size; ++i) { | 2695 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 4 | StringRef parse_res; | 2699 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 2 | res_chars, res_offsets); | 2702 | 2 | } else { | 2703 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 2 | continue; | 2705 | 2 | } | 2706 | 4 | } | 2707 | 4 | return Status::OK(); | 2708 | 4 | } |
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE Line | Count | Source | 2693 | 4 | ColumnString::Offsets& res_offsets) { | 2694 | 8 | for (size_t i = 0; i < size; ++i) { | 2695 | 4 | UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)]; | 2696 | 4 | StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i)); | 2697 | 4 | StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i)); | 2698 | 4 | StringRef parse_res; | 2699 | 4 | if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) { | 2700 | 2 | StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i, | 2701 | 2 | res_chars, res_offsets); | 2702 | 2 | } else { | 2703 | 2 | StringOP::push_null_string(i, res_chars, res_offsets, null_map_data); | 2704 | 2 | continue; | 2705 | 2 | } | 2706 | 4 | } | 2707 | 4 | return Status::OK(); | 2708 | 4 | } |
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE |
2709 | | }; |
2710 | | |
2711 | | class FunctionUrlDecode : public IFunction { |
2712 | | public: |
2713 | | static constexpr auto name = "url_decode"; |
2714 | 8 | static FunctionPtr create() { return std::make_shared<FunctionUrlDecode>(); } |
2715 | 1 | String get_name() const override { return name; } |
2716 | 0 | size_t get_number_of_arguments() const override { return 1; } |
2717 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2718 | 0 | return std::make_shared<DataTypeString>(); |
2719 | 0 | } |
2720 | | |
2721 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2722 | 0 | uint32_t result, size_t input_rows_count) const override { |
2723 | 0 | auto res = ColumnString::create(); |
2724 | 0 | res->get_offsets().reserve(input_rows_count); |
2725 | |
|
2726 | 0 | const auto* url_col = |
2727 | 0 | assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get()); |
2728 | |
|
2729 | 0 | std::string decoded_url; |
2730 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
2731 | 0 | auto url = url_col->get_data_at(i); |
2732 | 0 | if (!url_decode(url.to_string(), &decoded_url)) { |
2733 | 0 | return Status::InternalError("Decode url failed"); |
2734 | 0 | } |
2735 | 0 | res->insert_data(decoded_url.data(), decoded_url.size()); |
2736 | 0 | decoded_url.clear(); |
2737 | 0 | } |
2738 | | |
2739 | 0 | block.get_by_position(result).column = std::move(res); |
2740 | 0 | return Status::OK(); |
2741 | 0 | } |
2742 | | }; |
2743 | | |
2744 | | class FunctionUrlEncode : public IFunction { |
2745 | | public: |
2746 | | static constexpr auto name = "url_encode"; |
2747 | 12 | static FunctionPtr create() { return std::make_shared<FunctionUrlEncode>(); } |
2748 | 1 | String get_name() const override { return name; } |
2749 | 4 | size_t get_number_of_arguments() const override { return 1; } |
2750 | 4 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2751 | 4 | return std::make_shared<DataTypeString>(); |
2752 | 4 | } |
2753 | | |
2754 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2755 | 4 | uint32_t result, size_t input_rows_count) const override { |
2756 | 4 | auto res = ColumnString::create(); |
2757 | 4 | res->get_offsets().reserve(input_rows_count); |
2758 | | |
2759 | 4 | const auto* url_col = |
2760 | 4 | assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get()); |
2761 | | |
2762 | 4 | std::string encoded_url; |
2763 | 10 | for (size_t i = 0; i < input_rows_count; ++i) { |
2764 | 6 | auto url = url_col->get_data_at(i); |
2765 | 6 | url_encode(url.to_string_view(), &encoded_url); |
2766 | 6 | res->insert_data(encoded_url.data(), encoded_url.size()); |
2767 | 6 | encoded_url.clear(); |
2768 | 6 | } |
2769 | | |
2770 | 4 | block.get_by_position(result).column = std::move(res); |
2771 | 4 | return Status::OK(); |
2772 | 4 | } |
2773 | | }; |
2774 | | |
2775 | | class FunctionRandomBytes : public IFunction { |
2776 | | public: |
2777 | | static constexpr auto name = "random_bytes"; |
2778 | 8 | static FunctionPtr create() { return std::make_shared<FunctionRandomBytes>(); } |
2779 | 1 | String get_name() const override { return name; } |
2780 | 0 | size_t get_number_of_arguments() const override { return 1; } |
2781 | 1 | bool is_variadic() const override { return false; } |
2782 | | |
2783 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2784 | 0 | return std::make_shared<DataTypeString>(); |
2785 | 0 | } |
2786 | | |
2787 | 0 | bool use_default_implementation_for_constants() const final { return false; } |
2788 | | |
2789 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2790 | 0 | uint32_t result, size_t input_rows_count) const override { |
2791 | 0 | auto res = ColumnString::create(); |
2792 | 0 | auto& res_offsets = res->get_offsets(); |
2793 | 0 | auto& res_chars = res->get_chars(); |
2794 | 0 | res_offsets.resize(input_rows_count); |
2795 | |
|
2796 | 0 | auto [arg_col, arg_const] = unpack_if_const(block.get_by_position(arguments[0]).column); |
2797 | 0 | const auto* length_col = assert_cast<const ColumnInt32*>(arg_col.get()); |
2798 | |
|
2799 | 0 | if (arg_const) { |
2800 | 0 | res_chars.reserve(input_rows_count * (length_col->get_element(0) + 2)); |
2801 | 0 | } |
2802 | |
|
2803 | 0 | std::vector<uint8_t, Allocator_<uint8_t>> random_bytes; |
2804 | 0 | std::random_device rd; |
2805 | 0 | std::mt19937 gen(rd()); |
2806 | |
|
2807 | 0 | std::uniform_int_distribution<unsigned short> distribution(0, 255); |
2808 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
2809 | 0 | size_t index = index_check_const(i, arg_const); |
2810 | 0 | if (length_col->get_element(index) < 0) [[unlikely]] { |
2811 | 0 | return Status::InvalidArgument("argument {} of function {} at row {} was invalid.", |
2812 | 0 | length_col->get_element(index), name, index); |
2813 | 0 | } |
2814 | 0 | random_bytes.resize(length_col->get_element(index)); |
2815 | |
|
2816 | 0 | for (auto& byte : random_bytes) { |
2817 | 0 | byte = distribution(gen) & 0xFF; |
2818 | 0 | } |
2819 | |
|
2820 | 0 | std::basic_ostringstream<char, std::char_traits<char>, Allocator_<char>> oss; |
2821 | 0 | for (const auto& byte : random_bytes) { |
2822 | 0 | oss << std::setw(2) << std::setfill('0') << std::hex << static_cast<int>(byte); |
2823 | 0 | } |
2824 | |
|
2825 | 0 | StringOP::push_value_string("0x" + oss.str(), i, res_chars, res_offsets); |
2826 | 0 | random_bytes.clear(); |
2827 | 0 | } |
2828 | | |
2829 | 0 | block.get_by_position(result).column = std::move(res); |
2830 | |
|
2831 | 0 | return Status::OK(); |
2832 | 0 | } |
2833 | | }; |
2834 | | |
2835 | | template <typename Impl> |
2836 | | class FunctionMoneyFormat : public IFunction { |
2837 | | public: |
2838 | | static constexpr auto name = "money_format"; |
2839 | 73 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE6createEv Line | Count | Source | 2839 | 9 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE6createEv Line | Count | Source | 2839 | 9 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE6createEv Line | Count | Source | 2839 | 9 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv Line | Count | Source | 2839 | 10 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv Line | Count | Source | 2839 | 8 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv Line | Count | Source | 2839 | 12 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv Line | Count | Source | 2839 | 8 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv Line | Count | Source | 2839 | 8 | static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); } |
|
2840 | 8 | String get_name() const override { return name; }_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev Line | Count | Source | 2840 | 1 | String get_name() const override { return name; } |
|
2841 | | |
2842 | 8 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
2843 | 8 | if (arguments.size() != 1) { |
2844 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
2845 | 0 | "Function {} requires exactly 1 argument", name); |
2846 | 0 | } |
2847 | | |
2848 | 8 | return std::make_shared<DataTypeString>(); |
2849 | 8 | } _ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2842 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2843 | 1 | if (arguments.size() != 1) { | 2844 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 2845 | 0 | "Function {} requires exactly 1 argument", name); | 2846 | 0 | } | 2847 | | | 2848 | 1 | return std::make_shared<DataTypeString>(); | 2849 | 1 | } |
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2842 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2843 | 1 | if (arguments.size() != 1) { | 2844 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 2845 | 0 | "Function {} requires exactly 1 argument", name); | 2846 | 0 | } | 2847 | | | 2848 | 1 | return std::make_shared<DataTypeString>(); | 2849 | 1 | } |
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 2842 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2843 | 1 | if (arguments.size() != 1) { | 2844 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 2845 | 0 | "Function {} requires exactly 1 argument", name); | 2846 | 0 | } | 2847 | | | 2848 | 1 | return std::make_shared<DataTypeString>(); | 2849 | 1 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 2842 | 1 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2843 | 1 | if (arguments.size() != 1) { | 2844 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 2845 | 0 | "Function {} requires exactly 1 argument", name); | 2846 | 0 | } | 2847 | | | 2848 | 1 | return std::make_shared<DataTypeString>(); | 2849 | 1 | } |
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Line | Count | Source | 2842 | 4 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 2843 | 4 | if (arguments.size() != 1) { | 2844 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 2845 | 0 | "Function {} requires exactly 1 argument", name); | 2846 | 0 | } | 2847 | | | 2848 | 4 | return std::make_shared<DataTypeString>(); | 2849 | 4 | } |
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE |
2850 | 56 | DataTypes get_variadic_argument_types_impl() const override { |
2851 | 56 | return Impl::get_variadic_argument_types(); |
2852 | 56 | } _ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv Line | Count | Source | 2850 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 2851 | 7 | return Impl::get_variadic_argument_types(); | 2852 | 7 | } |
|
2853 | 8 | size_t get_number_of_arguments() const override { return 1; }_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE23get_number_of_argumentsEv Line | Count | Source | 2853 | 1 | size_t get_number_of_arguments() const override { return 1; } |
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE23get_number_of_argumentsEv Line | Count | Source | 2853 | 1 | size_t get_number_of_arguments() const override { return 1; } |
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE23get_number_of_argumentsEv Line | Count | Source | 2853 | 1 | size_t get_number_of_arguments() const override { return 1; } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv Line | Count | Source | 2853 | 1 | size_t get_number_of_arguments() const override { return 1; } |
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv Line | Count | Source | 2853 | 4 | size_t get_number_of_arguments() const override { return 1; } |
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv |
2854 | | |
2855 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
2856 | 8 | uint32_t result, size_t input_rows_count) const override { |
2857 | 8 | auto res_column = ColumnString::create(); |
2858 | 8 | ColumnPtr argument_column = block.get_by_position(arguments[0]).column; |
2859 | | |
2860 | 8 | auto result_column = assert_cast<ColumnString*>(res_column.get()); |
2861 | | |
2862 | 8 | Impl::execute(context, result_column, argument_column, input_rows_count); |
2863 | | |
2864 | 8 | block.replace_by_position(result, std::move(res_column)); |
2865 | 8 | return Status::OK(); |
2866 | 8 | } _ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2856 | 1 | uint32_t result, size_t input_rows_count) const override { | 2857 | 1 | auto res_column = ColumnString::create(); | 2858 | 1 | ColumnPtr argument_column = block.get_by_position(arguments[0]).column; | 2859 | | | 2860 | 1 | auto result_column = assert_cast<ColumnString*>(res_column.get()); | 2861 | | | 2862 | 1 | Impl::execute(context, result_column, argument_column, input_rows_count); | 2863 | | | 2864 | 1 | block.replace_by_position(result, std::move(res_column)); | 2865 | 1 | return Status::OK(); | 2866 | 1 | } |
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2856 | 1 | uint32_t result, size_t input_rows_count) const override { | 2857 | 1 | auto res_column = ColumnString::create(); | 2858 | 1 | ColumnPtr argument_column = block.get_by_position(arguments[0]).column; | 2859 | | | 2860 | 1 | auto result_column = assert_cast<ColumnString*>(res_column.get()); | 2861 | | | 2862 | 1 | Impl::execute(context, result_column, argument_column, input_rows_count); | 2863 | | | 2864 | 1 | block.replace_by_position(result, std::move(res_column)); | 2865 | 1 | return Status::OK(); | 2866 | 1 | } |
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2856 | 1 | uint32_t result, size_t input_rows_count) const override { | 2857 | 1 | auto res_column = ColumnString::create(); | 2858 | 1 | ColumnPtr argument_column = block.get_by_position(arguments[0]).column; | 2859 | | | 2860 | 1 | auto result_column = assert_cast<ColumnString*>(res_column.get()); | 2861 | | | 2862 | 1 | Impl::execute(context, result_column, argument_column, input_rows_count); | 2863 | | | 2864 | 1 | block.replace_by_position(result, std::move(res_column)); | 2865 | 1 | return Status::OK(); | 2866 | 1 | } |
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2856 | 2 | uint32_t result, size_t input_rows_count) const override { | 2857 | 2 | auto res_column = ColumnString::create(); | 2858 | 2 | ColumnPtr argument_column = block.get_by_position(arguments[0]).column; | 2859 | | | 2860 | 2 | auto result_column = assert_cast<ColumnString*>(res_column.get()); | 2861 | | | 2862 | 2 | Impl::execute(context, result_column, argument_column, input_rows_count); | 2863 | | | 2864 | 2 | block.replace_by_position(result, std::move(res_column)); | 2865 | 2 | return Status::OK(); | 2866 | 2 | } |
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 2856 | 3 | uint32_t result, size_t input_rows_count) const override { | 2857 | 3 | auto res_column = ColumnString::create(); | 2858 | 3 | ColumnPtr argument_column = block.get_by_position(arguments[0]).column; | 2859 | | | 2860 | 3 | auto result_column = assert_cast<ColumnString*>(res_column.get()); | 2861 | | | 2862 | 3 | Impl::execute(context, result_column, argument_column, input_rows_count); | 2863 | | | 2864 | 3 | block.replace_by_position(result, std::move(res_column)); | 2865 | 3 | return Status::OK(); | 2866 | 3 | } |
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
2867 | | }; |
2868 | | |
2869 | | // ---------------------------------------------------------------------- |
2870 | | // SimpleItoaWithCommas() |
2871 | | // Description: converts an integer to a string. |
2872 | | // Puts commas every 3 spaces. |
2873 | | // Faster than printf("%d")? |
2874 | | // |
2875 | | // Return value: string |
2876 | | // ---------------------------------------------------------------------- |
2877 | | template <typename T> |
2878 | 56 | char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) { |
2879 | 56 | char* p = buffer + buffer_size; |
2880 | | // Need to use unsigned T instead of T to correctly handle |
2881 | 56 | std::make_unsigned_t<T> n = i; |
2882 | 56 | if (i < 0) { |
2883 | 20 | n = 0 - n; |
2884 | 20 | } |
2885 | 56 | *--p = '0' + n % 10; // this case deals with the number "0" |
2886 | 56 | n /= 10; |
2887 | 142 | while (n) { |
2888 | 127 | *--p = '0' + n % 10; |
2889 | 127 | n /= 10; |
2890 | 127 | if (n == 0) { |
2891 | 25 | break; |
2892 | 25 | } |
2893 | | |
2894 | 102 | *--p = '0' + n % 10; |
2895 | 102 | n /= 10; |
2896 | 102 | if (n == 0) { |
2897 | 16 | break; |
2898 | 16 | } |
2899 | | |
2900 | 86 | *--p = ','; |
2901 | 86 | *--p = '0' + n % 10; |
2902 | 86 | n /= 10; |
2903 | | // For this unrolling, we check if n == 0 in the main while loop |
2904 | 86 | } |
2905 | 56 | if (i < 0) { |
2906 | 20 | *--p = '-'; |
2907 | 20 | } |
2908 | 56 | return p; |
2909 | 56 | } _ZN5doris20SimpleItoaWithCommasIlEEPcT_S1_i Line | Count | Source | 2878 | 27 | char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) { | 2879 | 27 | char* p = buffer + buffer_size; | 2880 | | // Need to use unsigned T instead of T to correctly handle | 2881 | 27 | std::make_unsigned_t<T> n = i; | 2882 | 27 | if (i < 0) { | 2883 | 10 | n = 0 - n; | 2884 | 10 | } | 2885 | 27 | *--p = '0' + n % 10; // this case deals with the number "0" | 2886 | 27 | n /= 10; | 2887 | 68 | while (n) { | 2888 | 63 | *--p = '0' + n % 10; | 2889 | 63 | n /= 10; | 2890 | 63 | if (n == 0) { | 2891 | 17 | break; | 2892 | 17 | } | 2893 | | | 2894 | 46 | *--p = '0' + n % 10; | 2895 | 46 | n /= 10; | 2896 | 46 | if (n == 0) { | 2897 | 5 | break; | 2898 | 5 | } | 2899 | | | 2900 | 41 | *--p = ','; | 2901 | 41 | *--p = '0' + n % 10; | 2902 | 41 | n /= 10; | 2903 | | // For this unrolling, we check if n == 0 in the main while loop | 2904 | 41 | } | 2905 | 27 | if (i < 0) { | 2906 | 10 | *--p = '-'; | 2907 | 10 | } | 2908 | 27 | return p; | 2909 | 27 | } |
_ZN5doris20SimpleItoaWithCommasInEEPcT_S1_i Line | Count | Source | 2878 | 29 | char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) { | 2879 | 29 | char* p = buffer + buffer_size; | 2880 | | // Need to use unsigned T instead of T to correctly handle | 2881 | 29 | std::make_unsigned_t<T> n = i; | 2882 | 29 | if (i < 0) { | 2883 | 10 | n = 0 - n; | 2884 | 10 | } | 2885 | 29 | *--p = '0' + n % 10; // this case deals with the number "0" | 2886 | 29 | n /= 10; | 2887 | 74 | while (n) { | 2888 | 64 | *--p = '0' + n % 10; | 2889 | 64 | n /= 10; | 2890 | 64 | if (n == 0) { | 2891 | 8 | break; | 2892 | 8 | } | 2893 | | | 2894 | 56 | *--p = '0' + n % 10; | 2895 | 56 | n /= 10; | 2896 | 56 | if (n == 0) { | 2897 | 11 | break; | 2898 | 11 | } | 2899 | | | 2900 | 45 | *--p = ','; | 2901 | 45 | *--p = '0' + n % 10; | 2902 | 45 | n /= 10; | 2903 | | // For this unrolling, we check if n == 0 in the main while loop | 2904 | 45 | } | 2905 | 29 | if (i < 0) { | 2906 | 10 | *--p = '-'; | 2907 | 10 | } | 2908 | 29 | return p; | 2909 | 29 | } |
|
2910 | | |
2911 | | namespace MoneyFormat { |
2912 | | |
2913 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC32() { |
2914 | 0 | // Decimal(9, 0) |
2915 | 0 | // Double the size to avoid some unexpected bug. |
2916 | 0 | return 2 * (1 + 9 + (9 / 3) + 3); |
2917 | 0 | } |
2918 | | |
2919 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC64() { |
2920 | 0 | // Decimal(18, 0) |
2921 | 0 | // Double the size to avoid some unexpected bug. |
2922 | 0 | return 2 * (1 + 18 + (18 / 3) + 3); |
2923 | 0 | } |
2924 | | |
2925 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC128V2() { |
2926 | 0 | // DecimalV2 has at most 27 digits |
2927 | 0 | // Double the size to avoid some unexpected bug. |
2928 | 0 | return 2 * (1 + 27 + (27 / 3) + 3); |
2929 | 0 | } |
2930 | | |
2931 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC128V3() { |
2932 | 0 | // Decimal(38, 0) |
2933 | 0 | // Double the size to avoid some unexpected bug. |
2934 | 0 | return 2 * (1 + 39 + (39 / 3) + 3); |
2935 | 0 | } |
2936 | | |
2937 | 0 | constexpr size_t MAX_FORMAT_LEN_INT64() { |
2938 | 0 | // INT_MIN = -9223372036854775807 |
2939 | 0 | // Double the size to avoid some unexpected bug. |
2940 | 0 | return 2 * (1 + 20 + (20 / 3) + 3); |
2941 | 0 | } |
2942 | | |
2943 | 0 | constexpr size_t MAX_FORMAT_LEN_INT128() { |
2944 | 0 | // INT128_MIN = -170141183460469231731687303715884105728 |
2945 | 0 | return 2 * (1 + 39 + (39 / 3) + 3); |
2946 | 0 | } |
2947 | | |
2948 | | template <typename T, size_t N> |
2949 | 25 | StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) { |
2950 | 25 | static_assert(std::is_integral<T>::value); |
2951 | 25 | const bool is_negative = int_value < 0 || frac_value < 0; |
2952 | | |
2953 | | // do round to frac_part |
2954 | | // magic number 2: since we need to round frac_part to 2 digits |
2955 | 25 | if (scale > 2) { |
2956 | 19 | DCHECK(scale <= 38); |
2957 | | // do rounding, so we need to reserve 3 digits. |
2958 | 19 | auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3))); |
2959 | | // do devide first to avoid overflow |
2960 | | // after round frac_value will be positive by design. |
2961 | 19 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; |
2962 | 19 | frac_value /= 10; |
2963 | 19 | } else if (scale < 2) { |
2964 | 6 | DCHECK(frac_value < 100); |
2965 | | // since scale <= 2, overflow is impossiable |
2966 | 6 | frac_value = frac_value * common::exp10_i32(2 - scale); |
2967 | 6 | } |
2968 | | |
2969 | 25 | if (frac_value == 100) { |
2970 | 3 | if (is_negative) { |
2971 | 2 | int_value -= 1; |
2972 | 2 | } else { |
2973 | 1 | int_value += 1; |
2974 | 1 | } |
2975 | 3 | frac_value = 0; |
2976 | 3 | } |
2977 | | |
2978 | 25 | bool append_sign_manually = false; |
2979 | 25 | if (is_negative && int_value == 0) { |
2980 | | // when int_value is 0, result of SimpleItoaWithCommas will contains just zero |
2981 | | // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded. |
2982 | | // this is why we introduce argument append_sing_manually. |
2983 | 2 | append_sign_manually = true; |
2984 | 2 | } |
2985 | | |
2986 | 25 | char local[N]; |
2987 | 25 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); |
2988 | 25 | const Int32 integer_str_len = N - (p - local); |
2989 | 25 | const Int32 frac_str_len = 2; |
2990 | 25 | const Int32 whole_decimal_str_len = |
2991 | 25 | (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len; |
2992 | | |
2993 | 25 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); |
2994 | | // Modify a string passed via stringref |
2995 | 25 | char* result_data = const_cast<char*>(result.data); |
2996 | | |
2997 | 25 | if (append_sign_manually) { |
2998 | 2 | memset(result_data, '-', 1); |
2999 | 2 | } |
3000 | | |
3001 | 25 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); |
3002 | 25 | *(result_data + whole_decimal_str_len - 3) = '.'; |
3003 | 25 | *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10)); |
3004 | 25 | *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10)); |
3005 | 25 | return result; |
3006 | 25 | }; _ZN5doris11MoneyFormat15do_money_formatIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_ Line | Count | Source | 2949 | 3 | StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) { | 2950 | 3 | static_assert(std::is_integral<T>::value); | 2951 | 3 | const bool is_negative = int_value < 0 || frac_value < 0; | 2952 | | | 2953 | | // do round to frac_part | 2954 | | // magic number 2: since we need to round frac_part to 2 digits | 2955 | 3 | if (scale > 2) { | 2956 | 0 | DCHECK(scale <= 38); | 2957 | | // do rounding, so we need to reserve 3 digits. | 2958 | 0 | auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3))); | 2959 | | // do devide first to avoid overflow | 2960 | | // after round frac_value will be positive by design. | 2961 | 0 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 2962 | 0 | frac_value /= 10; | 2963 | 3 | } else if (scale < 2) { | 2964 | 3 | DCHECK(frac_value < 100); | 2965 | | // since scale <= 2, overflow is impossiable | 2966 | 3 | frac_value = frac_value * common::exp10_i32(2 - scale); | 2967 | 3 | } | 2968 | | | 2969 | 3 | if (frac_value == 100) { | 2970 | 0 | if (is_negative) { | 2971 | 0 | int_value -= 1; | 2972 | 0 | } else { | 2973 | 0 | int_value += 1; | 2974 | 0 | } | 2975 | 0 | frac_value = 0; | 2976 | 0 | } | 2977 | | | 2978 | 3 | bool append_sign_manually = false; | 2979 | 3 | if (is_negative && int_value == 0) { | 2980 | | // when int_value is 0, result of SimpleItoaWithCommas will contains just zero | 2981 | | // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded. | 2982 | | // this is why we introduce argument append_sing_manually. | 2983 | 0 | append_sign_manually = true; | 2984 | 0 | } | 2985 | | | 2986 | 3 | char local[N]; | 2987 | 3 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 2988 | 3 | const Int32 integer_str_len = N - (p - local); | 2989 | 3 | const Int32 frac_str_len = 2; | 2990 | 3 | const Int32 whole_decimal_str_len = | 2991 | 3 | (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len; | 2992 | | | 2993 | 3 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 2994 | | // Modify a string passed via stringref | 2995 | 3 | char* result_data = const_cast<char*>(result.data); | 2996 | | | 2997 | 3 | if (append_sign_manually) { | 2998 | 0 | memset(result_data, '-', 1); | 2999 | 0 | } | 3000 | | | 3001 | 3 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3002 | 3 | *(result_data + whole_decimal_str_len - 3) = '.'; | 3003 | 3 | *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10)); | 3004 | 3 | *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10)); | 3005 | 3 | return result; | 3006 | 3 | }; |
_ZN5doris11MoneyFormat15do_money_formatInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_ Line | Count | Source | 2949 | 3 | StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) { | 2950 | 3 | static_assert(std::is_integral<T>::value); | 2951 | 3 | const bool is_negative = int_value < 0 || frac_value < 0; | 2952 | | | 2953 | | // do round to frac_part | 2954 | | // magic number 2: since we need to round frac_part to 2 digits | 2955 | 3 | if (scale > 2) { | 2956 | 0 | DCHECK(scale <= 38); | 2957 | | // do rounding, so we need to reserve 3 digits. | 2958 | 0 | auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3))); | 2959 | | // do devide first to avoid overflow | 2960 | | // after round frac_value will be positive by design. | 2961 | 0 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 2962 | 0 | frac_value /= 10; | 2963 | 3 | } else if (scale < 2) { | 2964 | 3 | DCHECK(frac_value < 100); | 2965 | | // since scale <= 2, overflow is impossiable | 2966 | 3 | frac_value = frac_value * common::exp10_i32(2 - scale); | 2967 | 3 | } | 2968 | | | 2969 | 3 | if (frac_value == 100) { | 2970 | 0 | if (is_negative) { | 2971 | 0 | int_value -= 1; | 2972 | 0 | } else { | 2973 | 0 | int_value += 1; | 2974 | 0 | } | 2975 | 0 | frac_value = 0; | 2976 | 0 | } | 2977 | | | 2978 | 3 | bool append_sign_manually = false; | 2979 | 3 | if (is_negative && int_value == 0) { | 2980 | | // when int_value is 0, result of SimpleItoaWithCommas will contains just zero | 2981 | | // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded. | 2982 | | // this is why we introduce argument append_sing_manually. | 2983 | 0 | append_sign_manually = true; | 2984 | 0 | } | 2985 | | | 2986 | 3 | char local[N]; | 2987 | 3 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 2988 | 3 | const Int32 integer_str_len = N - (p - local); | 2989 | 3 | const Int32 frac_str_len = 2; | 2990 | 3 | const Int32 whole_decimal_str_len = | 2991 | 3 | (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len; | 2992 | | | 2993 | 3 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 2994 | | // Modify a string passed via stringref | 2995 | 3 | char* result_data = const_cast<char*>(result.data); | 2996 | | | 2997 | 3 | if (append_sign_manually) { | 2998 | 0 | memset(result_data, '-', 1); | 2999 | 0 | } | 3000 | | | 3001 | 3 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3002 | 3 | *(result_data + whole_decimal_str_len - 3) = '.'; | 3003 | 3 | *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10)); | 3004 | 3 | *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10)); | 3005 | 3 | return result; | 3006 | 3 | }; |
_ZN5doris11MoneyFormat15do_money_formatInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_ Line | Count | Source | 2949 | 14 | StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) { | 2950 | 14 | static_assert(std::is_integral<T>::value); | 2951 | 14 | const bool is_negative = int_value < 0 || frac_value < 0; | 2952 | | | 2953 | | // do round to frac_part | 2954 | | // magic number 2: since we need to round frac_part to 2 digits | 2955 | 14 | if (scale > 2) { | 2956 | 14 | DCHECK(scale <= 38); | 2957 | | // do rounding, so we need to reserve 3 digits. | 2958 | 14 | auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3))); | 2959 | | // do devide first to avoid overflow | 2960 | | // after round frac_value will be positive by design. | 2961 | 14 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 2962 | 14 | frac_value /= 10; | 2963 | 14 | } else if (scale < 2) { | 2964 | 0 | DCHECK(frac_value < 100); | 2965 | | // since scale <= 2, overflow is impossiable | 2966 | 0 | frac_value = frac_value * common::exp10_i32(2 - scale); | 2967 | 0 | } | 2968 | | | 2969 | 14 | if (frac_value == 100) { | 2970 | 3 | if (is_negative) { | 2971 | 2 | int_value -= 1; | 2972 | 2 | } else { | 2973 | 1 | int_value += 1; | 2974 | 1 | } | 2975 | 3 | frac_value = 0; | 2976 | 3 | } | 2977 | | | 2978 | 14 | bool append_sign_manually = false; | 2979 | 14 | if (is_negative && int_value == 0) { | 2980 | | // when int_value is 0, result of SimpleItoaWithCommas will contains just zero | 2981 | | // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded. | 2982 | | // this is why we introduce argument append_sing_manually. | 2983 | 2 | append_sign_manually = true; | 2984 | 2 | } | 2985 | | | 2986 | 14 | char local[N]; | 2987 | 14 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 2988 | 14 | const Int32 integer_str_len = N - (p - local); | 2989 | 14 | const Int32 frac_str_len = 2; | 2990 | 14 | const Int32 whole_decimal_str_len = | 2991 | 14 | (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len; | 2992 | | | 2993 | 14 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 2994 | | // Modify a string passed via stringref | 2995 | 14 | char* result_data = const_cast<char*>(result.data); | 2996 | | | 2997 | 14 | if (append_sign_manually) { | 2998 | 2 | memset(result_data, '-', 1); | 2999 | 2 | } | 3000 | | | 3001 | 14 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3002 | 14 | *(result_data + whole_decimal_str_len - 3) = '.'; | 3003 | 14 | *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10)); | 3004 | 14 | *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10)); | 3005 | 14 | return result; | 3006 | 14 | }; |
Unexecuted instantiation: _ZN5doris11MoneyFormat15do_money_formatIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_ _ZN5doris11MoneyFormat15do_money_formatIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_ Line | Count | Source | 2949 | 5 | StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) { | 2950 | 5 | static_assert(std::is_integral<T>::value); | 2951 | 5 | const bool is_negative = int_value < 0 || frac_value < 0; | 2952 | | | 2953 | | // do round to frac_part | 2954 | | // magic number 2: since we need to round frac_part to 2 digits | 2955 | 5 | if (scale > 2) { | 2956 | 5 | DCHECK(scale <= 38); | 2957 | | // do rounding, so we need to reserve 3 digits. | 2958 | 5 | auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3))); | 2959 | | // do devide first to avoid overflow | 2960 | | // after round frac_value will be positive by design. | 2961 | 5 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 2962 | 5 | frac_value /= 10; | 2963 | 5 | } else if (scale < 2) { | 2964 | 0 | DCHECK(frac_value < 100); | 2965 | | // since scale <= 2, overflow is impossiable | 2966 | 0 | frac_value = frac_value * common::exp10_i32(2 - scale); | 2967 | 0 | } | 2968 | | | 2969 | 5 | if (frac_value == 100) { | 2970 | 0 | if (is_negative) { | 2971 | 0 | int_value -= 1; | 2972 | 0 | } else { | 2973 | 0 | int_value += 1; | 2974 | 0 | } | 2975 | 0 | frac_value = 0; | 2976 | 0 | } | 2977 | | | 2978 | 5 | bool append_sign_manually = false; | 2979 | 5 | if (is_negative && int_value == 0) { | 2980 | | // when int_value is 0, result of SimpleItoaWithCommas will contains just zero | 2981 | | // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded. | 2982 | | // this is why we introduce argument append_sing_manually. | 2983 | 0 | append_sign_manually = true; | 2984 | 0 | } | 2985 | | | 2986 | 5 | char local[N]; | 2987 | 5 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 2988 | 5 | const Int32 integer_str_len = N - (p - local); | 2989 | 5 | const Int32 frac_str_len = 2; | 2990 | 5 | const Int32 whole_decimal_str_len = | 2991 | 5 | (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len; | 2992 | | | 2993 | 5 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 2994 | | // Modify a string passed via stringref | 2995 | 5 | char* result_data = const_cast<char*>(result.data); | 2996 | | | 2997 | 5 | if (append_sign_manually) { | 2998 | 0 | memset(result_data, '-', 1); | 2999 | 0 | } | 3000 | | | 3001 | 5 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3002 | 5 | *(result_data + whole_decimal_str_len - 3) = '.'; | 3003 | 5 | *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10)); | 3004 | 5 | *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10)); | 3005 | 5 | return result; | 3006 | 5 | }; |
|
3007 | | |
3008 | | // Note string value must be valid decimal string which contains two digits after the decimal point |
3009 | 4 | static StringRef do_money_format(FunctionContext* context, const std::string& value) { |
3010 | 4 | bool is_positive = (value[0] != '-'); |
3011 | 4 | int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3; |
3012 | 4 | StringRef result = context->create_temp_string_val(result_len); |
3013 | | // Modify a string passed via stringref |
3014 | 4 | char* result_data = const_cast<char*>(result.data); |
3015 | 4 | if (!is_positive) { |
3016 | 2 | *result_data = '-'; |
3017 | 2 | } |
3018 | 10 | for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) { |
3019 | 9 | *(result_data + j) = *(value.data() + i); |
3020 | 9 | if (i - 1 < 0) { |
3021 | 2 | break; |
3022 | 2 | } |
3023 | 7 | *(result_data + j - 1) = *(value.data() + i - 1); |
3024 | 7 | if (i - 2 < 0) { |
3025 | 1 | break; |
3026 | 1 | } |
3027 | 6 | *(result_data + j - 2) = *(value.data() + i - 2); |
3028 | 6 | if (j - 3 > 1 || (j - 3 == 1 && is_positive)) { |
3029 | 4 | *(result_data + j - 3) = ','; |
3030 | 4 | j -= 4; |
3031 | 4 | } else { |
3032 | 2 | j -= 3; |
3033 | 2 | } |
3034 | 6 | } |
3035 | 4 | memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3); |
3036 | 4 | return result; |
3037 | 4 | }; function_string.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Line | Count | Source | 3009 | 4 | static StringRef do_money_format(FunctionContext* context, const std::string& value) { | 3010 | 4 | bool is_positive = (value[0] != '-'); | 3011 | 4 | int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3; | 3012 | 4 | StringRef result = context->create_temp_string_val(result_len); | 3013 | | // Modify a string passed via stringref | 3014 | 4 | char* result_data = const_cast<char*>(result.data); | 3015 | 4 | if (!is_positive) { | 3016 | 2 | *result_data = '-'; | 3017 | 2 | } | 3018 | 10 | for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) { | 3019 | 9 | *(result_data + j) = *(value.data() + i); | 3020 | 9 | if (i - 1 < 0) { | 3021 | 2 | break; | 3022 | 2 | } | 3023 | 7 | *(result_data + j - 1) = *(value.data() + i - 1); | 3024 | 7 | if (i - 2 < 0) { | 3025 | 1 | break; | 3026 | 1 | } | 3027 | 6 | *(result_data + j - 2) = *(value.data() + i - 2); | 3028 | 6 | if (j - 3 > 1 || (j - 3 == 1 && is_positive)) { | 3029 | 4 | *(result_data + j - 3) = ','; | 3030 | 4 | j -= 4; | 3031 | 4 | } else { | 3032 | 2 | j -= 3; | 3033 | 2 | } | 3034 | 6 | } | 3035 | 4 | memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3); | 3036 | 4 | return result; | 3037 | 4 | }; |
Unexecuted instantiation: function_split_by_regexp.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: pipeline_fragment_context.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: viceberg_table_writer.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: partition_transformers.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE Unexecuted instantiation: spill_iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE |
3038 | | |
3039 | | } // namespace MoneyFormat |
3040 | | |
3041 | | namespace FormatRound { |
3042 | | |
3043 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC32() { |
3044 | 0 | // Decimal(9, 0) |
3045 | 0 | // Double the size to avoid some unexpected bug. |
3046 | 0 | return 2 * (1 + 9 + (9 / 3) + 3); |
3047 | 0 | } |
3048 | | |
3049 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC64() { |
3050 | 0 | // Decimal(18, 0) |
3051 | 0 | // Double the size to avoid some unexpected bug. |
3052 | 0 | return 2 * (1 + 18 + (18 / 3) + 3); |
3053 | 0 | } |
3054 | | |
3055 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC128V2() { |
3056 | 0 | // DecimalV2 has at most 27 digits |
3057 | 0 | // Double the size to avoid some unexpected bug. |
3058 | 0 | return 2 * (1 + 27 + (27 / 3) + 3); |
3059 | 0 | } |
3060 | | |
3061 | 0 | constexpr size_t MAX_FORMAT_LEN_DEC128V3() { |
3062 | 0 | // Decimal(38, 0) |
3063 | 0 | // Double the size to avoid some unexpected bug. |
3064 | 0 | return 2 * (1 + 39 + (39 / 3) + 3); |
3065 | 0 | } |
3066 | | |
3067 | 0 | constexpr size_t MAX_FORMAT_LEN_INT64() { |
3068 | 0 | // INT_MIN = -9223372036854775807 |
3069 | 0 | // Double the size to avoid some unexpected bug. |
3070 | 0 | return 2 * (1 + 20 + (20 / 3) + 3); |
3071 | 0 | } |
3072 | | |
3073 | 0 | constexpr size_t MAX_FORMAT_LEN_INT128() { |
3074 | 0 | // INT128_MIN = -170141183460469231731687303715884105728 |
3075 | 0 | return 2 * (1 + 39 + (39 / 3) + 3); |
3076 | 0 | } |
3077 | | |
3078 | | template <typename T, size_t N> |
3079 | | StringRef do_format_round(FunctionContext* context, UInt32 scale, T int_value, T frac_value, |
3080 | 31 | Int32 decimal_places) { |
3081 | 31 | static_assert(std::is_integral<T>::value); |
3082 | 31 | const bool is_negative = int_value < 0 || frac_value < 0; |
3083 | | |
3084 | | // do round to frac_part based on decimal_places |
3085 | 31 | if (scale > decimal_places && decimal_places > 0) { |
3086 | 14 | DCHECK(scale <= 38); |
3087 | | // do rounding, so we need to reserve decimal_places + 1 digits |
3088 | 14 | auto multiplier = |
3089 | 14 | common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1)))); |
3090 | | // do divide first to avoid overflow |
3091 | | // after round frac_value will be positive by design |
3092 | 14 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; |
3093 | 14 | frac_value /= 10; |
3094 | 17 | } else if (scale < decimal_places && decimal_places > 0) { |
3095 | | // since scale <= decimal_places, overflow is impossible |
3096 | 13 | frac_value = frac_value * common::exp10_i32(decimal_places - scale); |
3097 | 13 | } |
3098 | | |
3099 | | // Calculate power of 10 for decimal_places |
3100 | 31 | T decimal_power = common::exp10_i32(decimal_places); |
3101 | 31 | if (frac_value == decimal_power) { |
3102 | 0 | if (is_negative) { |
3103 | 0 | int_value -= 1; |
3104 | 0 | } else { |
3105 | 0 | int_value += 1; |
3106 | 0 | } |
3107 | 0 | frac_value = 0; |
3108 | 0 | } |
3109 | | |
3110 | 31 | bool append_sign_manually = false; |
3111 | 31 | if (is_negative && int_value == 0) { |
3112 | 0 | append_sign_manually = true; |
3113 | 0 | } |
3114 | | |
3115 | 31 | char local[N]; |
3116 | 31 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); |
3117 | 31 | const Int32 integer_str_len = N - (p - local); |
3118 | 31 | const Int32 frac_str_len = decimal_places; |
3119 | 31 | const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len + |
3120 | 31 | (decimal_places > 0 ? 1 : 0) + frac_str_len; |
3121 | | |
3122 | 31 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); |
3123 | | // Modify a string passed via stringref |
3124 | 31 | char* result_data = const_cast<char*>(result.data); |
3125 | | |
3126 | 31 | if (append_sign_manually) { |
3127 | 0 | memset(result_data, '-', 1); |
3128 | 0 | } |
3129 | | |
3130 | 31 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); |
3131 | 31 | if (decimal_places > 0) { |
3132 | 27 | *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.'; |
3133 | 27 | } |
3134 | | |
3135 | | // Convert fractional part to string with proper padding |
3136 | 31 | T remaining_frac = std::abs(static_cast<int>(frac_value)); |
3137 | 139 | for (int i = 0; i <= decimal_places - 1; ++i) { |
3138 | 108 | *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10); |
3139 | 108 | remaining_frac /= 10; |
3140 | 108 | } |
3141 | 31 | return result; |
3142 | 31 | } _ZN5doris11FormatRound15do_format_roundIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i Line | Count | Source | 3080 | 8 | Int32 decimal_places) { | 3081 | 8 | static_assert(std::is_integral<T>::value); | 3082 | 8 | const bool is_negative = int_value < 0 || frac_value < 0; | 3083 | | | 3084 | | // do round to frac_part based on decimal_places | 3085 | 8 | if (scale > decimal_places && decimal_places > 0) { | 3086 | 0 | DCHECK(scale <= 38); | 3087 | | // do rounding, so we need to reserve decimal_places + 1 digits | 3088 | 0 | auto multiplier = | 3089 | 0 | common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1)))); | 3090 | | // do divide first to avoid overflow | 3091 | | // after round frac_value will be positive by design | 3092 | 0 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 3093 | 0 | frac_value /= 10; | 3094 | 8 | } else if (scale < decimal_places && decimal_places > 0) { | 3095 | | // since scale <= decimal_places, overflow is impossible | 3096 | 6 | frac_value = frac_value * common::exp10_i32(decimal_places - scale); | 3097 | 6 | } | 3098 | | | 3099 | | // Calculate power of 10 for decimal_places | 3100 | 8 | T decimal_power = common::exp10_i32(decimal_places); | 3101 | 8 | if (frac_value == decimal_power) { | 3102 | 0 | if (is_negative) { | 3103 | 0 | int_value -= 1; | 3104 | 0 | } else { | 3105 | 0 | int_value += 1; | 3106 | 0 | } | 3107 | 0 | frac_value = 0; | 3108 | 0 | } | 3109 | | | 3110 | 8 | bool append_sign_manually = false; | 3111 | 8 | if (is_negative && int_value == 0) { | 3112 | 0 | append_sign_manually = true; | 3113 | 0 | } | 3114 | | | 3115 | 8 | char local[N]; | 3116 | 8 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 3117 | 8 | const Int32 integer_str_len = N - (p - local); | 3118 | 8 | const Int32 frac_str_len = decimal_places; | 3119 | 8 | const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len + | 3120 | 8 | (decimal_places > 0 ? 1 : 0) + frac_str_len; | 3121 | | | 3122 | 8 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 3123 | | // Modify a string passed via stringref | 3124 | 8 | char* result_data = const_cast<char*>(result.data); | 3125 | | | 3126 | 8 | if (append_sign_manually) { | 3127 | 0 | memset(result_data, '-', 1); | 3128 | 0 | } | 3129 | | | 3130 | 8 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3131 | 8 | if (decimal_places > 0) { | 3132 | 6 | *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.'; | 3133 | 6 | } | 3134 | | | 3135 | | // Convert fractional part to string with proper padding | 3136 | 8 | T remaining_frac = std::abs(static_cast<int>(frac_value)); | 3137 | 47 | for (int i = 0; i <= decimal_places - 1; ++i) { | 3138 | 39 | *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10); | 3139 | 39 | remaining_frac /= 10; | 3140 | 39 | } | 3141 | 8 | return result; | 3142 | 8 | } |
_ZN5doris11FormatRound15do_format_roundInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i Line | Count | Source | 3080 | 9 | Int32 decimal_places) { | 3081 | 9 | static_assert(std::is_integral<T>::value); | 3082 | 9 | const bool is_negative = int_value < 0 || frac_value < 0; | 3083 | | | 3084 | | // do round to frac_part based on decimal_places | 3085 | 9 | if (scale > decimal_places && decimal_places > 0) { | 3086 | 0 | DCHECK(scale <= 38); | 3087 | | // do rounding, so we need to reserve decimal_places + 1 digits | 3088 | 0 | auto multiplier = | 3089 | 0 | common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1)))); | 3090 | | // do divide first to avoid overflow | 3091 | | // after round frac_value will be positive by design | 3092 | 0 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 3093 | 0 | frac_value /= 10; | 3094 | 9 | } else if (scale < decimal_places && decimal_places > 0) { | 3095 | | // since scale <= decimal_places, overflow is impossible | 3096 | 7 | frac_value = frac_value * common::exp10_i32(decimal_places - scale); | 3097 | 7 | } | 3098 | | | 3099 | | // Calculate power of 10 for decimal_places | 3100 | 9 | T decimal_power = common::exp10_i32(decimal_places); | 3101 | 9 | if (frac_value == decimal_power) { | 3102 | 0 | if (is_negative) { | 3103 | 0 | int_value -= 1; | 3104 | 0 | } else { | 3105 | 0 | int_value += 1; | 3106 | 0 | } | 3107 | 0 | frac_value = 0; | 3108 | 0 | } | 3109 | | | 3110 | 9 | bool append_sign_manually = false; | 3111 | 9 | if (is_negative && int_value == 0) { | 3112 | 0 | append_sign_manually = true; | 3113 | 0 | } | 3114 | | | 3115 | 9 | char local[N]; | 3116 | 9 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 3117 | 9 | const Int32 integer_str_len = N - (p - local); | 3118 | 9 | const Int32 frac_str_len = decimal_places; | 3119 | 9 | const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len + | 3120 | 9 | (decimal_places > 0 ? 1 : 0) + frac_str_len; | 3121 | | | 3122 | 9 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 3123 | | // Modify a string passed via stringref | 3124 | 9 | char* result_data = const_cast<char*>(result.data); | 3125 | | | 3126 | 9 | if (append_sign_manually) { | 3127 | 0 | memset(result_data, '-', 1); | 3128 | 0 | } | 3129 | | | 3130 | 9 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3131 | 9 | if (decimal_places > 0) { | 3132 | 7 | *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.'; | 3133 | 7 | } | 3134 | | | 3135 | | // Convert fractional part to string with proper padding | 3136 | 9 | T remaining_frac = std::abs(static_cast<int>(frac_value)); | 3137 | 50 | for (int i = 0; i <= decimal_places - 1; ++i) { | 3138 | 41 | *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10); | 3139 | 41 | remaining_frac /= 10; | 3140 | 41 | } | 3141 | 9 | return result; | 3142 | 9 | } |
_ZN5doris11FormatRound15do_format_roundInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i Line | Count | Source | 3080 | 3 | Int32 decimal_places) { | 3081 | 3 | static_assert(std::is_integral<T>::value); | 3082 | 3 | const bool is_negative = int_value < 0 || frac_value < 0; | 3083 | | | 3084 | | // do round to frac_part based on decimal_places | 3085 | 3 | if (scale > decimal_places && decimal_places > 0) { | 3086 | 3 | DCHECK(scale <= 38); | 3087 | | // do rounding, so we need to reserve decimal_places + 1 digits | 3088 | 3 | auto multiplier = | 3089 | 3 | common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1)))); | 3090 | | // do divide first to avoid overflow | 3091 | | // after round frac_value will be positive by design | 3092 | 3 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 3093 | 3 | frac_value /= 10; | 3094 | 3 | } else if (scale < decimal_places && decimal_places > 0) { | 3095 | | // since scale <= decimal_places, overflow is impossible | 3096 | 0 | frac_value = frac_value * common::exp10_i32(decimal_places - scale); | 3097 | 0 | } | 3098 | | | 3099 | | // Calculate power of 10 for decimal_places | 3100 | 3 | T decimal_power = common::exp10_i32(decimal_places); | 3101 | 3 | if (frac_value == decimal_power) { | 3102 | 0 | if (is_negative) { | 3103 | 0 | int_value -= 1; | 3104 | 0 | } else { | 3105 | 0 | int_value += 1; | 3106 | 0 | } | 3107 | 0 | frac_value = 0; | 3108 | 0 | } | 3109 | | | 3110 | 3 | bool append_sign_manually = false; | 3111 | 3 | if (is_negative && int_value == 0) { | 3112 | 0 | append_sign_manually = true; | 3113 | 0 | } | 3114 | | | 3115 | 3 | char local[N]; | 3116 | 3 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 3117 | 3 | const Int32 integer_str_len = N - (p - local); | 3118 | 3 | const Int32 frac_str_len = decimal_places; | 3119 | 3 | const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len + | 3120 | 3 | (decimal_places > 0 ? 1 : 0) + frac_str_len; | 3121 | | | 3122 | 3 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 3123 | | // Modify a string passed via stringref | 3124 | 3 | char* result_data = const_cast<char*>(result.data); | 3125 | | | 3126 | 3 | if (append_sign_manually) { | 3127 | 0 | memset(result_data, '-', 1); | 3128 | 0 | } | 3129 | | | 3130 | 3 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3131 | 3 | if (decimal_places > 0) { | 3132 | 3 | *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.'; | 3133 | 3 | } | 3134 | | | 3135 | | // Convert fractional part to string with proper padding | 3136 | 3 | T remaining_frac = std::abs(static_cast<int>(frac_value)); | 3137 | 9 | for (int i = 0; i <= decimal_places - 1; ++i) { | 3138 | 6 | *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10); | 3139 | 6 | remaining_frac /= 10; | 3140 | 6 | } | 3141 | 3 | return result; | 3142 | 3 | } |
Unexecuted instantiation: _ZN5doris11FormatRound15do_format_roundIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i _ZN5doris11FormatRound15do_format_roundIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i Line | Count | Source | 3080 | 11 | Int32 decimal_places) { | 3081 | 11 | static_assert(std::is_integral<T>::value); | 3082 | 11 | const bool is_negative = int_value < 0 || frac_value < 0; | 3083 | | | 3084 | | // do round to frac_part based on decimal_places | 3085 | 11 | if (scale > decimal_places && decimal_places > 0) { | 3086 | 11 | DCHECK(scale <= 38); | 3087 | | // do rounding, so we need to reserve decimal_places + 1 digits | 3088 | 11 | auto multiplier = | 3089 | 11 | common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1)))); | 3090 | | // do divide first to avoid overflow | 3091 | | // after round frac_value will be positive by design | 3092 | 11 | frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5; | 3093 | 11 | frac_value /= 10; | 3094 | 11 | } else if (scale < decimal_places && decimal_places > 0) { | 3095 | | // since scale <= decimal_places, overflow is impossible | 3096 | 0 | frac_value = frac_value * common::exp10_i32(decimal_places - scale); | 3097 | 0 | } | 3098 | | | 3099 | | // Calculate power of 10 for decimal_places | 3100 | 11 | T decimal_power = common::exp10_i32(decimal_places); | 3101 | 11 | if (frac_value == decimal_power) { | 3102 | 0 | if (is_negative) { | 3103 | 0 | int_value -= 1; | 3104 | 0 | } else { | 3105 | 0 | int_value += 1; | 3106 | 0 | } | 3107 | 0 | frac_value = 0; | 3108 | 0 | } | 3109 | | | 3110 | 11 | bool append_sign_manually = false; | 3111 | 11 | if (is_negative && int_value == 0) { | 3112 | 0 | append_sign_manually = true; | 3113 | 0 | } | 3114 | | | 3115 | 11 | char local[N]; | 3116 | 11 | char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local)); | 3117 | 11 | const Int32 integer_str_len = N - (p - local); | 3118 | 11 | const Int32 frac_str_len = decimal_places; | 3119 | 11 | const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len + | 3120 | 11 | (decimal_places > 0 ? 1 : 0) + frac_str_len; | 3121 | | | 3122 | 11 | StringRef result = context->create_temp_string_val(whole_decimal_str_len); | 3123 | | // Modify a string passed via stringref | 3124 | 11 | char* result_data = const_cast<char*>(result.data); | 3125 | | | 3126 | 11 | if (append_sign_manually) { | 3127 | 0 | memset(result_data, '-', 1); | 3128 | 0 | } | 3129 | | | 3130 | 11 | memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len); | 3131 | 11 | if (decimal_places > 0) { | 3132 | 11 | *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.'; | 3133 | 11 | } | 3134 | | | 3135 | | // Convert fractional part to string with proper padding | 3136 | 11 | T remaining_frac = std::abs(static_cast<int>(frac_value)); | 3137 | 33 | for (int i = 0; i <= decimal_places - 1; ++i) { | 3138 | 22 | *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10); | 3139 | 22 | remaining_frac /= 10; | 3140 | 22 | } | 3141 | 11 | return result; | 3142 | 11 | } |
|
3143 | | |
3144 | | } // namespace FormatRound |
3145 | | |
3146 | | struct MoneyFormatDoubleImpl { |
3147 | 7 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; } |
3148 | | |
3149 | | static void execute(FunctionContext* context, ColumnString* result_column, |
3150 | 1 | const ColumnPtr col_ptr, size_t input_rows_count) { |
3151 | 1 | const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get()); |
3152 | | // when scale is above 38, we will go here |
3153 | 5 | for (size_t i = 0; i < input_rows_count; i++) { |
3154 | | // round to 2 decimal places |
3155 | 4 | double value = |
3156 | 4 | MathFunctions::my_double_round(data_column->get_element(i), 2, false, false); |
3157 | 4 | StringRef str = MoneyFormat::do_money_format(context, fmt::format("{:.2f}", value)); |
3158 | 4 | result_column->insert_data(str.data, str.size); |
3159 | 4 | } |
3160 | 1 | } |
3161 | | }; |
3162 | | |
3163 | | struct MoneyFormatInt64Impl { |
3164 | 7 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt64>()}; } |
3165 | | |
3166 | | static void execute(FunctionContext* context, ColumnString* result_column, |
3167 | 1 | const ColumnPtr col_ptr, size_t input_rows_count) { |
3168 | 1 | const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get()); |
3169 | 4 | for (size_t i = 0; i < input_rows_count; i++) { |
3170 | 3 | Int64 value = data_column->get_element(i); |
3171 | 3 | StringRef str = |
3172 | 3 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_INT64()>( |
3173 | 3 | context, 0, value, 0); |
3174 | 3 | result_column->insert_data(str.data, str.size); |
3175 | 3 | } |
3176 | 1 | } |
3177 | | }; |
3178 | | |
3179 | | struct MoneyFormatInt128Impl { |
3180 | 7 | static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt128>()}; } |
3181 | | |
3182 | | static void execute(FunctionContext* context, ColumnString* result_column, |
3183 | 1 | const ColumnPtr col_ptr, size_t input_rows_count) { |
3184 | 1 | const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get()); |
3185 | | // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will |
3186 | | // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris, |
3187 | | // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124 |
3188 | 4 | for (size_t i = 0; i < input_rows_count; i++) { |
3189 | 3 | Int128 value = data_column->get_element(i); |
3190 | 3 | StringRef str = |
3191 | 3 | MoneyFormat::do_money_format<Int128, MoneyFormat::MAX_FORMAT_LEN_INT128()>( |
3192 | 3 | context, 0, value, 0); |
3193 | 3 | result_column->insert_data(str.data, str.size); |
3194 | 3 | } |
3195 | 1 | } |
3196 | | }; |
3197 | | |
3198 | | template <PrimitiveType Type> |
3199 | | struct MoneyFormatDecimalImpl { |
3200 | 35 | static DataTypes get_variadic_argument_types() { |
3201 | 35 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()}; |
3202 | 35 | } _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv Line | Count | Source | 3200 | 7 | static DataTypes get_variadic_argument_types() { | 3201 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()}; | 3202 | 7 | } |
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv Line | Count | Source | 3200 | 7 | static DataTypes get_variadic_argument_types() { | 3201 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()}; | 3202 | 7 | } |
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv Line | Count | Source | 3200 | 7 | static DataTypes get_variadic_argument_types() { | 3201 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()}; | 3202 | 7 | } |
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv Line | Count | Source | 3200 | 7 | static DataTypes get_variadic_argument_types() { | 3201 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()}; | 3202 | 7 | } |
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv Line | Count | Source | 3200 | 7 | static DataTypes get_variadic_argument_types() { | 3201 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()}; | 3202 | 7 | } |
|
3203 | | |
3204 | | static void execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr, |
3205 | 5 | size_t input_rows_count) { |
3206 | 5 | if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { |
3207 | 16 | for (size_t i = 0; i < input_rows_count; i++) { |
3208 | 14 | const auto& value = decimalv2_column->get_element(i); |
3209 | | // unified_frac_value has 3 digits |
3210 | 14 | auto unified_frac_value = value.frac_value() / 1000000; |
3211 | 14 | StringRef str = |
3212 | 14 | MoneyFormat::do_money_format<Int128, |
3213 | 14 | MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>( |
3214 | 14 | context, 3, value.int_value(), unified_frac_value); |
3215 | | |
3216 | 14 | result_column->insert_data(str.data, str.size); |
3217 | 14 | } |
3218 | 3 | } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { |
3219 | 0 | const UInt32 scale = decimal32_column->get_scale(); |
3220 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
3221 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); |
3222 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); |
3223 | 0 | StringRef str = |
3224 | 0 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>( |
3225 | 0 | context, scale, static_cast<Int64>(whole_part), |
3226 | 0 | static_cast<Int64>(frac_part)); |
3227 | |
|
3228 | 0 | result_column->insert_data(str.data, str.size); |
3229 | 0 | } |
3230 | 3 | } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { |
3231 | 3 | const UInt32 scale = decimal64_column->get_scale(); |
3232 | 8 | for (size_t i = 0; i < input_rows_count; i++) { |
3233 | 5 | const Int64& frac_part = decimal64_column->get_fractional_part(i); |
3234 | 5 | const Int64& whole_part = decimal64_column->get_intergral_part(i); |
3235 | | |
3236 | 5 | StringRef str = |
3237 | 5 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>( |
3238 | 5 | context, scale, whole_part, frac_part); |
3239 | | |
3240 | 5 | result_column->insert_data(str.data, str.size); |
3241 | 5 | } |
3242 | 3 | } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { |
3243 | 0 | const UInt32 scale = decimal128_column->get_scale(); |
3244 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
3245 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); |
3246 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); |
3247 | |
|
3248 | 0 | StringRef str = |
3249 | 0 | MoneyFormat::do_money_format<Int128, |
3250 | 0 | MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>( |
3251 | 0 | context, scale, whole_part, frac_part); |
3252 | |
|
3253 | 0 | result_column->insert_data(str.data, str.size); |
3254 | 0 | } |
3255 | 0 | } else { |
3256 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
3257 | 0 | "Not supported input argument type {}", col_ptr->get_name()); |
3258 | 0 | } |
3259 | | // TODO: decimal256 |
3260 | | /* else if (auto* decimal256_column = |
3261 | | check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) { |
3262 | | const UInt32 scale = decimal256_column->get_scale(); |
3263 | | const auto multiplier = |
3264 | | scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale); |
3265 | | for (size_t i = 0; i < input_rows_count; i++) { |
3266 | | Decimal256 frac_part = decimal256_column->get_fractional_part(i); |
3267 | | if (scale > 2) { |
3268 | | int delta = ((frac_part % multiplier) << 1) > multiplier; |
3269 | | frac_part = Decimal256(frac_part / multiplier + delta); |
3270 | | } else if (scale < 2) { |
3271 | | frac_part = Decimal256(frac_part * multiplier); |
3272 | | } |
3273 | | |
3274 | | StringRef str = MoneyFormat::do_money_format<int64_t, 26>( |
3275 | | context, decimal256_column->get_intergral_part(i), frac_part); |
3276 | | |
3277 | | result_column->insert_data(str.data, str.size); |
3278 | | } |
3279 | | }*/ |
3280 | 5 | } _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm Line | Count | Source | 3205 | 2 | size_t input_rows_count) { | 3206 | 2 | if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { | 3207 | 16 | for (size_t i = 0; i < input_rows_count; i++) { | 3208 | 14 | const auto& value = decimalv2_column->get_element(i); | 3209 | | // unified_frac_value has 3 digits | 3210 | 14 | auto unified_frac_value = value.frac_value() / 1000000; | 3211 | 14 | StringRef str = | 3212 | 14 | MoneyFormat::do_money_format<Int128, | 3213 | 14 | MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>( | 3214 | 14 | context, 3, value.int_value(), unified_frac_value); | 3215 | | | 3216 | 14 | result_column->insert_data(str.data, str.size); | 3217 | 14 | } | 3218 | 2 | } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { | 3219 | 0 | const UInt32 scale = decimal32_column->get_scale(); | 3220 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3221 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); | 3222 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); | 3223 | 0 | StringRef str = | 3224 | 0 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>( | 3225 | 0 | context, scale, static_cast<Int64>(whole_part), | 3226 | 0 | static_cast<Int64>(frac_part)); | 3227 | |
| 3228 | 0 | result_column->insert_data(str.data, str.size); | 3229 | 0 | } | 3230 | 0 | } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { | 3231 | 0 | const UInt32 scale = decimal64_column->get_scale(); | 3232 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3233 | 0 | const Int64& frac_part = decimal64_column->get_fractional_part(i); | 3234 | 0 | const Int64& whole_part = decimal64_column->get_intergral_part(i); | 3235 | |
| 3236 | 0 | StringRef str = | 3237 | 0 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>( | 3238 | 0 | context, scale, whole_part, frac_part); | 3239 | |
| 3240 | 0 | result_column->insert_data(str.data, str.size); | 3241 | 0 | } | 3242 | 0 | } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { | 3243 | 0 | const UInt32 scale = decimal128_column->get_scale(); | 3244 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3245 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); | 3246 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); | 3247 | |
| 3248 | 0 | StringRef str = | 3249 | 0 | MoneyFormat::do_money_format<Int128, | 3250 | 0 | MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>( | 3251 | 0 | context, scale, whole_part, frac_part); | 3252 | |
| 3253 | 0 | result_column->insert_data(str.data, str.size); | 3254 | 0 | } | 3255 | 0 | } else { | 3256 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 3257 | 0 | "Not supported input argument type {}", col_ptr->get_name()); | 3258 | 0 | } | 3259 | | // TODO: decimal256 | 3260 | | /* else if (auto* decimal256_column = | 3261 | | check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) { | 3262 | | const UInt32 scale = decimal256_column->get_scale(); | 3263 | | const auto multiplier = | 3264 | | scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale); | 3265 | | for (size_t i = 0; i < input_rows_count; i++) { | 3266 | | Decimal256 frac_part = decimal256_column->get_fractional_part(i); | 3267 | | if (scale > 2) { | 3268 | | int delta = ((frac_part % multiplier) << 1) > multiplier; | 3269 | | frac_part = Decimal256(frac_part / multiplier + delta); | 3270 | | } else if (scale < 2) { | 3271 | | frac_part = Decimal256(frac_part * multiplier); | 3272 | | } | 3273 | | | 3274 | | StringRef str = MoneyFormat::do_money_format<int64_t, 26>( | 3275 | | context, decimal256_column->get_intergral_part(i), frac_part); | 3276 | | | 3277 | | result_column->insert_data(str.data, str.size); | 3278 | | } | 3279 | | }*/ | 3280 | 2 | } |
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm Line | Count | Source | 3205 | 3 | size_t input_rows_count) { | 3206 | 3 | if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { | 3207 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3208 | 0 | const auto& value = decimalv2_column->get_element(i); | 3209 | | // unified_frac_value has 3 digits | 3210 | 0 | auto unified_frac_value = value.frac_value() / 1000000; | 3211 | 0 | StringRef str = | 3212 | 0 | MoneyFormat::do_money_format<Int128, | 3213 | 0 | MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>( | 3214 | 0 | context, 3, value.int_value(), unified_frac_value); | 3215 | |
| 3216 | 0 | result_column->insert_data(str.data, str.size); | 3217 | 0 | } | 3218 | 3 | } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { | 3219 | 0 | const UInt32 scale = decimal32_column->get_scale(); | 3220 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3221 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); | 3222 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); | 3223 | 0 | StringRef str = | 3224 | 0 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>( | 3225 | 0 | context, scale, static_cast<Int64>(whole_part), | 3226 | 0 | static_cast<Int64>(frac_part)); | 3227 | |
| 3228 | 0 | result_column->insert_data(str.data, str.size); | 3229 | 0 | } | 3230 | 3 | } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { | 3231 | 3 | const UInt32 scale = decimal64_column->get_scale(); | 3232 | 8 | for (size_t i = 0; i < input_rows_count; i++) { | 3233 | 5 | const Int64& frac_part = decimal64_column->get_fractional_part(i); | 3234 | 5 | const Int64& whole_part = decimal64_column->get_intergral_part(i); | 3235 | | | 3236 | 5 | StringRef str = | 3237 | 5 | MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>( | 3238 | 5 | context, scale, whole_part, frac_part); | 3239 | | | 3240 | 5 | result_column->insert_data(str.data, str.size); | 3241 | 5 | } | 3242 | 3 | } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { | 3243 | 0 | const UInt32 scale = decimal128_column->get_scale(); | 3244 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3245 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); | 3246 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); | 3247 | |
| 3248 | 0 | StringRef str = | 3249 | 0 | MoneyFormat::do_money_format<Int128, | 3250 | 0 | MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>( | 3251 | 0 | context, scale, whole_part, frac_part); | 3252 | |
| 3253 | 0 | result_column->insert_data(str.data, str.size); | 3254 | 0 | } | 3255 | 0 | } else { | 3256 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, | 3257 | 0 | "Not supported input argument type {}", col_ptr->get_name()); | 3258 | 0 | } | 3259 | | // TODO: decimal256 | 3260 | | /* else if (auto* decimal256_column = | 3261 | | check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) { | 3262 | | const UInt32 scale = decimal256_column->get_scale(); | 3263 | | const auto multiplier = | 3264 | | scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale); | 3265 | | for (size_t i = 0; i < input_rows_count; i++) { | 3266 | | Decimal256 frac_part = decimal256_column->get_fractional_part(i); | 3267 | | if (scale > 2) { | 3268 | | int delta = ((frac_part % multiplier) << 1) > multiplier; | 3269 | | frac_part = Decimal256(frac_part / multiplier + delta); | 3270 | | } else if (scale < 2) { | 3271 | | frac_part = Decimal256(frac_part * multiplier); | 3272 | | } | 3273 | | | 3274 | | StringRef str = MoneyFormat::do_money_format<int64_t, 26>( | 3275 | | context, decimal256_column->get_intergral_part(i), frac_part); | 3276 | | | 3277 | | result_column->insert_data(str.data, str.size); | 3278 | | } | 3279 | | }*/ | 3280 | 3 | } |
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm |
3281 | | }; |
3282 | | |
3283 | | struct FormatRoundDoubleImpl { |
3284 | 7 | static DataTypes get_variadic_argument_types() { |
3285 | 7 | return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()}; |
3286 | 7 | } |
3287 | | |
3288 | 4 | static std::string add_thousands_separator(const std::string& formatted_num) { |
3289 | | // Find the position of the decimal point |
3290 | 4 | size_t dot_pos = formatted_num.find('.'); |
3291 | 4 | if (dot_pos == std::string::npos) { |
3292 | 0 | dot_pos = formatted_num.size(); |
3293 | 0 | } |
3294 | | |
3295 | | // Handle the integer part |
3296 | 4 | int start = (formatted_num[0] == '-') ? 1 : 0; |
3297 | 4 | int digit_count = dot_pos - start; |
3298 | | |
3299 | | // There is no need to add commas. |
3300 | 4 | if (digit_count <= 3) { |
3301 | 2 | return formatted_num; |
3302 | 2 | } |
3303 | | |
3304 | 2 | std::string result; |
3305 | | |
3306 | 2 | if (start == 1) result += '-'; |
3307 | | |
3308 | | // Add the integer part (with comma) |
3309 | 2 | int first_group = digit_count % 3; |
3310 | 2 | if (first_group == 0) first_group = 3; |
3311 | 2 | result.append(formatted_num, start, first_group); |
3312 | | |
3313 | 6 | for (size_t i = start + first_group; i < dot_pos; i += 3) { |
3314 | 4 | result += ','; |
3315 | 4 | result.append(formatted_num, i, 3); |
3316 | 4 | } |
3317 | | |
3318 | | // Add the decimal part (keep as it is) |
3319 | 2 | if (dot_pos != formatted_num.size()) { |
3320 | 2 | result.append(formatted_num, dot_pos); |
3321 | 2 | } |
3322 | | |
3323 | 2 | return result; |
3324 | 4 | } |
3325 | | |
3326 | | template <bool is_const> |
3327 | | static Status execute(FunctionContext* context, ColumnString* result_column, |
3328 | | const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr, |
3329 | 1 | size_t input_rows_count) { |
3330 | 1 | const auto& arg_column_data_2 = |
3331 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); |
3332 | 1 | const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get()); |
3333 | | // when scale is above 38, we will go here |
3334 | 5 | for (size_t i = 0; i < input_rows_count; i++) { |
3335 | 4 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3336 | 4 | if (decimal_places < 0 || decimal_places > 1024) { |
3337 | 0 | return Status::InvalidArgument( |
3338 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3339 | 0 | decimal_places); |
3340 | 0 | } |
3341 | | // round to `decimal_places` decimal places |
3342 | 4 | double value = MathFunctions::my_double_round(data_column->get_element(i), |
3343 | 4 | decimal_places, false, false); |
3344 | 4 | std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places); |
3345 | 4 | if (std::isfinite(value)) { |
3346 | 4 | result_column->insert_value(add_thousands_separator(formatted_value)); |
3347 | 4 | } else { |
3348 | | // if value is not finite, we just insert the original formatted value |
3349 | | // e.g. "inf", "-inf", "nan" |
3350 | 0 | result_column->insert_value(formatted_value); |
3351 | 0 | } |
3352 | 4 | } |
3353 | 1 | return Status::OK(); |
3354 | 1 | } Unexecuted instantiation: _ZN5doris21FormatRoundDoubleImpl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m _ZN5doris21FormatRoundDoubleImpl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m Line | Count | Source | 3329 | 1 | size_t input_rows_count) { | 3330 | 1 | const auto& arg_column_data_2 = | 3331 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); | 3332 | 1 | const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get()); | 3333 | | // when scale is above 38, we will go here | 3334 | 5 | for (size_t i = 0; i < input_rows_count; i++) { | 3335 | 4 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3336 | 4 | if (decimal_places < 0 || decimal_places > 1024) { | 3337 | 0 | return Status::InvalidArgument( | 3338 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3339 | 0 | decimal_places); | 3340 | 0 | } | 3341 | | // round to `decimal_places` decimal places | 3342 | 4 | double value = MathFunctions::my_double_round(data_column->get_element(i), | 3343 | 4 | decimal_places, false, false); | 3344 | 4 | std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places); | 3345 | 4 | if (std::isfinite(value)) { | 3346 | 4 | result_column->insert_value(add_thousands_separator(formatted_value)); | 3347 | 4 | } else { | 3348 | | // if value is not finite, we just insert the original formatted value | 3349 | | // e.g. "inf", "-inf", "nan" | 3350 | 0 | result_column->insert_value(formatted_value); | 3351 | 0 | } | 3352 | 4 | } | 3353 | 1 | return Status::OK(); | 3354 | 1 | } |
|
3355 | | }; |
3356 | | |
3357 | | struct FormatRoundInt64Impl { |
3358 | 7 | static DataTypes get_variadic_argument_types() { |
3359 | 7 | return {std::make_shared<DataTypeInt64>(), std::make_shared<DataTypeInt32>()}; |
3360 | 7 | } |
3361 | | |
3362 | | template <bool is_const> |
3363 | | static Status execute(FunctionContext* context, ColumnString* result_column, |
3364 | | const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr, |
3365 | 1 | size_t input_rows_count) { |
3366 | 1 | const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get()); |
3367 | 1 | const auto& arg_column_data_2 = |
3368 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); |
3369 | 9 | for (size_t i = 0; i < input_rows_count; i++) { |
3370 | 8 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3371 | 8 | if (decimal_places < 0 || decimal_places > 1024) { |
3372 | 0 | return Status::InvalidArgument( |
3373 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3374 | 0 | decimal_places); |
3375 | 0 | } |
3376 | 8 | Int64 value = data_column->get_element(i); |
3377 | 8 | StringRef str = |
3378 | 8 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>( |
3379 | 8 | context, 0, value, 0, decimal_places); |
3380 | 8 | result_column->insert_data(str.data, str.size); |
3381 | 8 | } |
3382 | 1 | return Status::OK(); |
3383 | 1 | } Unexecuted instantiation: _ZN5doris20FormatRoundInt64Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m _ZN5doris20FormatRoundInt64Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m Line | Count | Source | 3365 | 1 | size_t input_rows_count) { | 3366 | 1 | const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get()); | 3367 | 1 | const auto& arg_column_data_2 = | 3368 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); | 3369 | 9 | for (size_t i = 0; i < input_rows_count; i++) { | 3370 | 8 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3371 | 8 | if (decimal_places < 0 || decimal_places > 1024) { | 3372 | 0 | return Status::InvalidArgument( | 3373 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3374 | 0 | decimal_places); | 3375 | 0 | } | 3376 | 8 | Int64 value = data_column->get_element(i); | 3377 | 8 | StringRef str = | 3378 | 8 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>( | 3379 | 8 | context, 0, value, 0, decimal_places); | 3380 | 8 | result_column->insert_data(str.data, str.size); | 3381 | 8 | } | 3382 | 1 | return Status::OK(); | 3383 | 1 | } |
|
3384 | | }; |
3385 | | |
3386 | | struct FormatRoundInt128Impl { |
3387 | 7 | static DataTypes get_variadic_argument_types() { |
3388 | 7 | return {std::make_shared<DataTypeInt128>(), std::make_shared<DataTypeInt32>()}; |
3389 | 7 | } |
3390 | | |
3391 | | template <bool is_const> |
3392 | | static Status execute(FunctionContext* context, ColumnString* result_column, |
3393 | | const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr, |
3394 | 1 | size_t input_rows_count) { |
3395 | 1 | const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get()); |
3396 | 1 | const auto& arg_column_data_2 = |
3397 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); |
3398 | | // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will |
3399 | | // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris, |
3400 | | // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124 |
3401 | 10 | for (size_t i = 0; i < input_rows_count; i++) { |
3402 | 9 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3403 | 9 | if (decimal_places < 0 || decimal_places > 1024) { |
3404 | 0 | return Status::InvalidArgument( |
3405 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3406 | 0 | decimal_places); |
3407 | 0 | } |
3408 | 9 | Int128 value = data_column->get_element(i); |
3409 | 9 | StringRef str = |
3410 | 9 | FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>( |
3411 | 9 | context, 0, value, 0, decimal_places); |
3412 | 9 | result_column->insert_data(str.data, str.size); |
3413 | 9 | } |
3414 | 1 | return Status::OK(); |
3415 | 1 | } Unexecuted instantiation: _ZN5doris21FormatRoundInt128Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m _ZN5doris21FormatRoundInt128Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m Line | Count | Source | 3394 | 1 | size_t input_rows_count) { | 3395 | 1 | const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get()); | 3396 | 1 | const auto& arg_column_data_2 = | 3397 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); | 3398 | | // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will | 3399 | | // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris, | 3400 | | // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124 | 3401 | 10 | for (size_t i = 0; i < input_rows_count; i++) { | 3402 | 9 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3403 | 9 | if (decimal_places < 0 || decimal_places > 1024) { | 3404 | 0 | return Status::InvalidArgument( | 3405 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3406 | 0 | decimal_places); | 3407 | 0 | } | 3408 | 9 | Int128 value = data_column->get_element(i); | 3409 | 9 | StringRef str = | 3410 | 9 | FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>( | 3411 | 9 | context, 0, value, 0, decimal_places); | 3412 | 9 | result_column->insert_data(str.data, str.size); | 3413 | 9 | } | 3414 | 1 | return Status::OK(); | 3415 | 1 | } |
|
3416 | | }; |
3417 | | |
3418 | | template <PrimitiveType Type> |
3419 | | struct FormatRoundDecimalImpl { |
3420 | 35 | static DataTypes get_variadic_argument_types() { |
3421 | 35 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(), |
3422 | 35 | std::make_shared<DataTypeInt32>()}; |
3423 | 35 | } _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv Line | Count | Source | 3420 | 7 | static DataTypes get_variadic_argument_types() { | 3421 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(), | 3422 | 7 | std::make_shared<DataTypeInt32>()}; | 3423 | 7 | } |
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv Line | Count | Source | 3420 | 7 | static DataTypes get_variadic_argument_types() { | 3421 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(), | 3422 | 7 | std::make_shared<DataTypeInt32>()}; | 3423 | 7 | } |
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv Line | Count | Source | 3420 | 7 | static DataTypes get_variadic_argument_types() { | 3421 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(), | 3422 | 7 | std::make_shared<DataTypeInt32>()}; | 3423 | 7 | } |
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv Line | Count | Source | 3420 | 7 | static DataTypes get_variadic_argument_types() { | 3421 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(), | 3422 | 7 | std::make_shared<DataTypeInt32>()}; | 3423 | 7 | } |
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv Line | Count | Source | 3420 | 7 | static DataTypes get_variadic_argument_types() { | 3421 | 7 | return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(), | 3422 | 7 | std::make_shared<DataTypeInt32>()}; | 3423 | 7 | } |
|
3424 | | |
3425 | | template <bool is_const> |
3426 | | static Status execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr, |
3427 | 9 | ColumnPtr decimal_places_col_ptr, size_t input_rows_count) { |
3428 | 9 | const auto& arg_column_data_2 = |
3429 | 9 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); |
3430 | 9 | if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { |
3431 | 4 | for (size_t i = 0; i < input_rows_count; i++) { |
3432 | 3 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3433 | 3 | if (decimal_places < 0 || decimal_places > 1024) { |
3434 | 0 | return Status::InvalidArgument( |
3435 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3436 | 0 | decimal_places); |
3437 | 0 | } |
3438 | 3 | const auto& value = decimalv2_column->get_element(i); |
3439 | | // unified_frac_value has 3 digits |
3440 | 3 | auto unified_frac_value = value.frac_value() / 1000000; |
3441 | 3 | StringRef str = |
3442 | 3 | FormatRound::do_format_round<Int128, |
3443 | 3 | FormatRound::MAX_FORMAT_LEN_DEC128V2()>( |
3444 | 3 | context, 3, value.int_value(), unified_frac_value, decimal_places); |
3445 | | |
3446 | 3 | result_column->insert_data(str.data, str.size); |
3447 | 3 | } |
3448 | 8 | } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { |
3449 | 0 | const UInt32 scale = decimal32_column->get_scale(); |
3450 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
3451 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3452 | 0 | if (decimal_places < 0 || decimal_places > 1024) { |
3453 | 0 | return Status::InvalidArgument( |
3454 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3455 | 0 | decimal_places); |
3456 | 0 | } |
3457 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); |
3458 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); |
3459 | 0 | StringRef str = |
3460 | 0 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>( |
3461 | 0 | context, scale, static_cast<Int64>(whole_part), |
3462 | 0 | static_cast<Int64>(frac_part), decimal_places); |
3463 | |
|
3464 | 0 | result_column->insert_data(str.data, str.size); |
3465 | 0 | } |
3466 | 8 | } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { |
3467 | 8 | const UInt32 scale = decimal64_column->get_scale(); |
3468 | 19 | for (size_t i = 0; i < input_rows_count; i++) { |
3469 | 11 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3470 | 11 | if (decimal_places < 0 || decimal_places > 1024) { |
3471 | 0 | return Status::InvalidArgument( |
3472 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3473 | 0 | decimal_places); |
3474 | 0 | } |
3475 | 11 | const Int64& frac_part = decimal64_column->get_fractional_part(i); |
3476 | 11 | const Int64& whole_part = decimal64_column->get_intergral_part(i); |
3477 | | |
3478 | 11 | StringRef str = |
3479 | 11 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>( |
3480 | 11 | context, scale, whole_part, frac_part, decimal_places); |
3481 | | |
3482 | 11 | result_column->insert_data(str.data, str.size); |
3483 | 11 | } |
3484 | 8 | } else if (const auto* decimal128_column = |
3485 | 0 | check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { |
3486 | 0 | const UInt32 scale = decimal128_column->get_scale(); |
3487 | 0 | for (size_t i = 0; i < input_rows_count; i++) { |
3488 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; |
3489 | 0 | if (decimal_places < 0 || decimal_places > 1024) { |
3490 | 0 | return Status::InvalidArgument( |
3491 | 0 | "The second argument is {}, it should be in range [0, 1024].", |
3492 | 0 | decimal_places); |
3493 | 0 | } |
3494 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); |
3495 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); |
3496 | |
|
3497 | 0 | StringRef str = |
3498 | 0 | FormatRound::do_format_round<Int128, |
3499 | 0 | FormatRound::MAX_FORMAT_LEN_DEC128V3()>( |
3500 | 0 | context, scale, whole_part, frac_part, decimal_places); |
3501 | |
|
3502 | 0 | result_column->insert_data(str.data, str.size); |
3503 | 0 | } |
3504 | 0 | } else { |
3505 | 0 | return Status::InternalError("Not supported input argument type {}", |
3506 | 0 | col_ptr->get_name()); |
3507 | 0 | } |
3508 | 9 | return Status::OK(); |
3509 | 9 | } Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Line | Count | Source | 3427 | 1 | ColumnPtr decimal_places_col_ptr, size_t input_rows_count) { | 3428 | 1 | const auto& arg_column_data_2 = | 3429 | 1 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); | 3430 | 1 | if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { | 3431 | 4 | for (size_t i = 0; i < input_rows_count; i++) { | 3432 | 3 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3433 | 3 | if (decimal_places < 0 || decimal_places > 1024) { | 3434 | 0 | return Status::InvalidArgument( | 3435 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3436 | 0 | decimal_places); | 3437 | 0 | } | 3438 | 3 | const auto& value = decimalv2_column->get_element(i); | 3439 | | // unified_frac_value has 3 digits | 3440 | 3 | auto unified_frac_value = value.frac_value() / 1000000; | 3441 | 3 | StringRef str = | 3442 | 3 | FormatRound::do_format_round<Int128, | 3443 | 3 | FormatRound::MAX_FORMAT_LEN_DEC128V2()>( | 3444 | 3 | context, 3, value.int_value(), unified_frac_value, decimal_places); | 3445 | | | 3446 | 3 | result_column->insert_data(str.data, str.size); | 3447 | 3 | } | 3448 | 1 | } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { | 3449 | 0 | const UInt32 scale = decimal32_column->get_scale(); | 3450 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3451 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3452 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3453 | 0 | return Status::InvalidArgument( | 3454 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3455 | 0 | decimal_places); | 3456 | 0 | } | 3457 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); | 3458 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); | 3459 | 0 | StringRef str = | 3460 | 0 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>( | 3461 | 0 | context, scale, static_cast<Int64>(whole_part), | 3462 | 0 | static_cast<Int64>(frac_part), decimal_places); | 3463 | |
| 3464 | 0 | result_column->insert_data(str.data, str.size); | 3465 | 0 | } | 3466 | 0 | } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { | 3467 | 0 | const UInt32 scale = decimal64_column->get_scale(); | 3468 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3469 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3470 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3471 | 0 | return Status::InvalidArgument( | 3472 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3473 | 0 | decimal_places); | 3474 | 0 | } | 3475 | 0 | const Int64& frac_part = decimal64_column->get_fractional_part(i); | 3476 | 0 | const Int64& whole_part = decimal64_column->get_intergral_part(i); | 3477 | |
| 3478 | 0 | StringRef str = | 3479 | 0 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>( | 3480 | 0 | context, scale, whole_part, frac_part, decimal_places); | 3481 | |
| 3482 | 0 | result_column->insert_data(str.data, str.size); | 3483 | 0 | } | 3484 | 0 | } else if (const auto* decimal128_column = | 3485 | 0 | check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { | 3486 | 0 | const UInt32 scale = decimal128_column->get_scale(); | 3487 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3488 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3489 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3490 | 0 | return Status::InvalidArgument( | 3491 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3492 | 0 | decimal_places); | 3493 | 0 | } | 3494 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); | 3495 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); | 3496 | |
| 3497 | 0 | StringRef str = | 3498 | 0 | FormatRound::do_format_round<Int128, | 3499 | 0 | FormatRound::MAX_FORMAT_LEN_DEC128V3()>( | 3500 | 0 | context, scale, whole_part, frac_part, decimal_places); | 3501 | |
| 3502 | 0 | result_column->insert_data(str.data, str.size); | 3503 | 0 | } | 3504 | 0 | } else { | 3505 | 0 | return Status::InternalError("Not supported input argument type {}", | 3506 | 0 | col_ptr->get_name()); | 3507 | 0 | } | 3508 | 1 | return Status::OK(); | 3509 | 1 | } |
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Line | Count | Source | 3427 | 2 | ColumnPtr decimal_places_col_ptr, size_t input_rows_count) { | 3428 | 2 | const auto& arg_column_data_2 = | 3429 | 2 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); | 3430 | 2 | if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { | 3431 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3432 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3433 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3434 | 0 | return Status::InvalidArgument( | 3435 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3436 | 0 | decimal_places); | 3437 | 0 | } | 3438 | 0 | const auto& value = decimalv2_column->get_element(i); | 3439 | | // unified_frac_value has 3 digits | 3440 | 0 | auto unified_frac_value = value.frac_value() / 1000000; | 3441 | 0 | StringRef str = | 3442 | 0 | FormatRound::do_format_round<Int128, | 3443 | 0 | FormatRound::MAX_FORMAT_LEN_DEC128V2()>( | 3444 | 0 | context, 3, value.int_value(), unified_frac_value, decimal_places); | 3445 | |
| 3446 | 0 | result_column->insert_data(str.data, str.size); | 3447 | 0 | } | 3448 | 2 | } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { | 3449 | 0 | const UInt32 scale = decimal32_column->get_scale(); | 3450 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3451 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3452 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3453 | 0 | return Status::InvalidArgument( | 3454 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3455 | 0 | decimal_places); | 3456 | 0 | } | 3457 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); | 3458 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); | 3459 | 0 | StringRef str = | 3460 | 0 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>( | 3461 | 0 | context, scale, static_cast<Int64>(whole_part), | 3462 | 0 | static_cast<Int64>(frac_part), decimal_places); | 3463 | |
| 3464 | 0 | result_column->insert_data(str.data, str.size); | 3465 | 0 | } | 3466 | 2 | } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { | 3467 | 2 | const UInt32 scale = decimal64_column->get_scale(); | 3468 | 4 | for (size_t i = 0; i < input_rows_count; i++) { | 3469 | 2 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3470 | 2 | if (decimal_places < 0 || decimal_places > 1024) { | 3471 | 0 | return Status::InvalidArgument( | 3472 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3473 | 0 | decimal_places); | 3474 | 0 | } | 3475 | 2 | const Int64& frac_part = decimal64_column->get_fractional_part(i); | 3476 | 2 | const Int64& whole_part = decimal64_column->get_intergral_part(i); | 3477 | | | 3478 | 2 | StringRef str = | 3479 | 2 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>( | 3480 | 2 | context, scale, whole_part, frac_part, decimal_places); | 3481 | | | 3482 | 2 | result_column->insert_data(str.data, str.size); | 3483 | 2 | } | 3484 | 2 | } else if (const auto* decimal128_column = | 3485 | 0 | check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { | 3486 | 0 | const UInt32 scale = decimal128_column->get_scale(); | 3487 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3488 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3489 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3490 | 0 | return Status::InvalidArgument( | 3491 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3492 | 0 | decimal_places); | 3493 | 0 | } | 3494 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); | 3495 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); | 3496 | |
| 3497 | 0 | StringRef str = | 3498 | 0 | FormatRound::do_format_round<Int128, | 3499 | 0 | FormatRound::MAX_FORMAT_LEN_DEC128V3()>( | 3500 | 0 | context, scale, whole_part, frac_part, decimal_places); | 3501 | |
| 3502 | 0 | result_column->insert_data(str.data, str.size); | 3503 | 0 | } | 3504 | 0 | } else { | 3505 | 0 | return Status::InternalError("Not supported input argument type {}", | 3506 | 0 | col_ptr->get_name()); | 3507 | 0 | } | 3508 | 2 | return Status::OK(); | 3509 | 2 | } |
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Line | Count | Source | 3427 | 6 | ColumnPtr decimal_places_col_ptr, size_t input_rows_count) { | 3428 | 6 | const auto& arg_column_data_2 = | 3429 | 6 | assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data(); | 3430 | 6 | if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) { | 3431 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3432 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3433 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3434 | 0 | return Status::InvalidArgument( | 3435 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3436 | 0 | decimal_places); | 3437 | 0 | } | 3438 | 0 | const auto& value = decimalv2_column->get_element(i); | 3439 | | // unified_frac_value has 3 digits | 3440 | 0 | auto unified_frac_value = value.frac_value() / 1000000; | 3441 | 0 | StringRef str = | 3442 | 0 | FormatRound::do_format_round<Int128, | 3443 | 0 | FormatRound::MAX_FORMAT_LEN_DEC128V2()>( | 3444 | 0 | context, 3, value.int_value(), unified_frac_value, decimal_places); | 3445 | |
| 3446 | 0 | result_column->insert_data(str.data, str.size); | 3447 | 0 | } | 3448 | 6 | } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) { | 3449 | 0 | const UInt32 scale = decimal32_column->get_scale(); | 3450 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3451 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3452 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3453 | 0 | return Status::InvalidArgument( | 3454 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3455 | 0 | decimal_places); | 3456 | 0 | } | 3457 | 0 | const Int32& frac_part = decimal32_column->get_fractional_part(i); | 3458 | 0 | const Int32& whole_part = decimal32_column->get_intergral_part(i); | 3459 | 0 | StringRef str = | 3460 | 0 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>( | 3461 | 0 | context, scale, static_cast<Int64>(whole_part), | 3462 | 0 | static_cast<Int64>(frac_part), decimal_places); | 3463 | |
| 3464 | 0 | result_column->insert_data(str.data, str.size); | 3465 | 0 | } | 3466 | 6 | } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) { | 3467 | 6 | const UInt32 scale = decimal64_column->get_scale(); | 3468 | 15 | for (size_t i = 0; i < input_rows_count; i++) { | 3469 | 9 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3470 | 9 | if (decimal_places < 0 || decimal_places > 1024) { | 3471 | 0 | return Status::InvalidArgument( | 3472 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3473 | 0 | decimal_places); | 3474 | 0 | } | 3475 | 9 | const Int64& frac_part = decimal64_column->get_fractional_part(i); | 3476 | 9 | const Int64& whole_part = decimal64_column->get_intergral_part(i); | 3477 | | | 3478 | 9 | StringRef str = | 3479 | 9 | FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>( | 3480 | 9 | context, scale, whole_part, frac_part, decimal_places); | 3481 | | | 3482 | 9 | result_column->insert_data(str.data, str.size); | 3483 | 9 | } | 3484 | 6 | } else if (const auto* decimal128_column = | 3485 | 0 | check_and_get_column<ColumnDecimal128V3>(*col_ptr)) { | 3486 | 0 | const UInt32 scale = decimal128_column->get_scale(); | 3487 | 0 | for (size_t i = 0; i < input_rows_count; i++) { | 3488 | 0 | int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)]; | 3489 | 0 | if (decimal_places < 0 || decimal_places > 1024) { | 3490 | 0 | return Status::InvalidArgument( | 3491 | 0 | "The second argument is {}, it should be in range [0, 1024].", | 3492 | 0 | decimal_places); | 3493 | 0 | } | 3494 | 0 | const Int128& frac_part = decimal128_column->get_fractional_part(i); | 3495 | 0 | const Int128& whole_part = decimal128_column->get_intergral_part(i); | 3496 | |
| 3497 | 0 | StringRef str = | 3498 | 0 | FormatRound::do_format_round<Int128, | 3499 | 0 | FormatRound::MAX_FORMAT_LEN_DEC128V3()>( | 3500 | 0 | context, scale, whole_part, frac_part, decimal_places); | 3501 | |
| 3502 | 0 | result_column->insert_data(str.data, str.size); | 3503 | 0 | } | 3504 | 0 | } else { | 3505 | 0 | return Status::InternalError("Not supported input argument type {}", | 3506 | 0 | col_ptr->get_name()); | 3507 | 0 | } | 3508 | 6 | return Status::OK(); | 3509 | 6 | } |
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m |
3510 | | }; |
3511 | | |
3512 | | class FunctionStringLocatePos : public IFunction { |
3513 | | public: |
3514 | | static constexpr auto name = "locate"; |
3515 | 822 | static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); } |
3516 | 0 | String get_name() const override { return name; } |
3517 | 0 | size_t get_number_of_arguments() const override { return 3; } |
3518 | | |
3519 | 814 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3520 | 814 | return std::make_shared<DataTypeInt32>(); |
3521 | 814 | } |
3522 | | |
3523 | 7 | DataTypes get_variadic_argument_types_impl() const override { |
3524 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
3525 | 7 | std::make_shared<DataTypeInt32>()}; |
3526 | 7 | } |
3527 | | |
3528 | 815 | bool is_variadic() const override { return true; } |
3529 | | |
3530 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3531 | 576 | uint32_t result, size_t input_rows_count) const override { |
3532 | 576 | if (arguments.size() != 3) { |
3533 | 0 | return Status::InvalidArgument("Function {} requires 3 arguments, but got {}", |
3534 | 0 | get_name(), arguments.size()); |
3535 | 0 | } |
3536 | 576 | bool col_const[3]; |
3537 | 576 | ColumnPtr argument_columns[3]; |
3538 | 2.30k | for (int i = 0; i < 3; ++i) { |
3539 | 1.72k | std::tie(argument_columns[i], col_const[i]) = |
3540 | 1.72k | unpack_if_const(block.get_by_position(arguments[i]).column); |
3541 | 1.72k | } |
3542 | | |
3543 | 576 | const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get()); |
3544 | 576 | const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get()); |
3545 | 576 | const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
3546 | | |
3547 | 576 | ColumnInt32::MutablePtr col_res = ColumnInt32::create(); |
3548 | 576 | auto& vec_res = col_res->get_data(); |
3549 | 576 | vec_res.resize(block.rows()); |
3550 | | |
3551 | 576 | const bool is_ascii = col_left->is_ascii() && col_right->is_ascii(); |
3552 | | |
3553 | 576 | if (col_const[0]) { |
3554 | 246 | std::visit( |
3555 | 246 | [&](auto is_ascii, auto str_const, auto pos_const) { |
3556 | 246 | scalar_search<is_ascii, str_const, pos_const>( |
3557 | 246 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, |
3558 | 246 | input_rows_count); |
3559 | 246 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 3555 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3556 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 3557 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 3558 | 22 | input_rows_count); | 3559 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 3555 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3556 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 3557 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 3558 | 22 | input_rows_count); | 3559 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 3555 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3556 | 22 | scalar_search<is_ascii, str_const, pos_const>( | 3557 | 22 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 3558 | 22 | input_rows_count); | 3559 | 22 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 3555 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3556 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 3557 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 3558 | 60 | input_rows_count); | 3559 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 3555 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3556 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 3557 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 3558 | 60 | input_rows_count); | 3559 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 3555 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3556 | 60 | scalar_search<is_ascii, str_const, pos_const>( | 3557 | 60 | col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res, | 3558 | 60 | input_rows_count); | 3559 | 60 | }, |
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ |
3560 | 246 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
3561 | 246 | make_bool_variant(col_const[2])); |
3562 | | |
3563 | 330 | } else { |
3564 | 330 | std::visit( |
3565 | 330 | [&](auto is_ascii, auto str_const, auto pos_const) { |
3566 | 330 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, |
3567 | 330 | col_pos->get_data(), vec_res, |
3568 | 330 | input_rows_count); |
3569 | 330 | }, _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 3565 | 23 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 23 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 23 | col_pos->get_data(), vec_res, | 3568 | 23 | input_rows_count); | 3569 | 23 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_ Line | Count | Source | 3565 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 22 | col_pos->get_data(), vec_res, | 3568 | 22 | input_rows_count); | 3569 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_ Line | Count | Source | 3565 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 22 | col_pos->get_data(), vec_res, | 3568 | 22 | input_rows_count); | 3569 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_ Line | Count | Source | 3565 | 22 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 22 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 22 | col_pos->get_data(), vec_res, | 3568 | 22 | input_rows_count); | 3569 | 22 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_ Line | Count | Source | 3565 | 61 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 61 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 61 | col_pos->get_data(), vec_res, | 3568 | 61 | input_rows_count); | 3569 | 61 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_ Line | Count | Source | 3565 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 60 | col_pos->get_data(), vec_res, | 3568 | 60 | input_rows_count); | 3569 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_ Line | Count | Source | 3565 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 60 | col_pos->get_data(), vec_res, | 3568 | 60 | input_rows_count); | 3569 | 60 | }, |
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_ Line | Count | Source | 3565 | 60 | [&](auto is_ascii, auto str_const, auto pos_const) { | 3566 | 60 | vector_search<is_ascii, str_const, pos_const>(col_left, col_right, | 3567 | 60 | col_pos->get_data(), vec_res, | 3568 | 60 | input_rows_count); | 3569 | 60 | }, |
|
3570 | 330 | make_bool_variant(is_ascii), make_bool_variant(col_const[1]), |
3571 | 330 | make_bool_variant(col_const[2])); |
3572 | 330 | } |
3573 | 576 | block.replace_by_position(result, std::move(col_res)); |
3574 | 576 | return Status::OK(); |
3575 | 576 | } |
3576 | | |
3577 | | private: |
3578 | | template <bool is_ascii, bool str_const, bool pos_const> |
3579 | | void scalar_search(const StringRef& ldata, const ColumnString* col_right, |
3580 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
3581 | 246 | size_t size) const { |
3582 | 246 | res.resize(size); |
3583 | 246 | StringRef substr(ldata.data, ldata.size); |
3584 | 246 | StringSearch search {&substr}; |
3585 | | |
3586 | 492 | for (int i = 0; i < size; ++i) { |
3587 | 246 | res[i] = locate_pos<is_ascii>(substr, |
3588 | 246 | col_right->get_data_at(index_check_const<str_const>(i)), |
3589 | 246 | search, posdata[index_check_const<pos_const>(i)]); |
3590 | 246 | } |
3591 | 246 | } _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 3581 | 22 | size_t size) const { | 3582 | 22 | res.resize(size); | 3583 | 22 | StringRef substr(ldata.data, ldata.size); | 3584 | 22 | StringSearch search {&substr}; | 3585 | | | 3586 | 44 | for (int i = 0; i < size; ++i) { | 3587 | 22 | res[i] = locate_pos<is_ascii>(substr, | 3588 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 3589 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 3590 | 22 | } | 3591 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 3581 | 22 | size_t size) const { | 3582 | 22 | res.resize(size); | 3583 | 22 | StringRef substr(ldata.data, ldata.size); | 3584 | 22 | StringSearch search {&substr}; | 3585 | | | 3586 | 44 | for (int i = 0; i < size; ++i) { | 3587 | 22 | res[i] = locate_pos<is_ascii>(substr, | 3588 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 3589 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 3590 | 22 | } | 3591 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 3581 | 22 | size_t size) const { | 3582 | 22 | res.resize(size); | 3583 | 22 | StringRef substr(ldata.data, ldata.size); | 3584 | 22 | StringSearch search {&substr}; | 3585 | | | 3586 | 44 | for (int i = 0; i < size; ++i) { | 3587 | 22 | res[i] = locate_pos<is_ascii>(substr, | 3588 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 3589 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 3590 | 22 | } | 3591 | 22 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 3581 | 60 | size_t size) const { | 3582 | 60 | res.resize(size); | 3583 | 60 | StringRef substr(ldata.data, ldata.size); | 3584 | 60 | StringSearch search {&substr}; | 3585 | | | 3586 | 120 | for (int i = 0; i < size; ++i) { | 3587 | 60 | res[i] = locate_pos<is_ascii>(substr, | 3588 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 3589 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 3590 | 60 | } | 3591 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 3581 | 60 | size_t size) const { | 3582 | 60 | res.resize(size); | 3583 | 60 | StringRef substr(ldata.data, ldata.size); | 3584 | 60 | StringSearch search {&substr}; | 3585 | | | 3586 | 120 | for (int i = 0; i < size; ++i) { | 3587 | 60 | res[i] = locate_pos<is_ascii>(substr, | 3588 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 3589 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 3590 | 60 | } | 3591 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m Line | Count | Source | 3581 | 60 | size_t size) const { | 3582 | 60 | res.resize(size); | 3583 | 60 | StringRef substr(ldata.data, ldata.size); | 3584 | 60 | StringSearch search {&substr}; | 3585 | | | 3586 | 120 | for (int i = 0; i < size; ++i) { | 3587 | 60 | res[i] = locate_pos<is_ascii>(substr, | 3588 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 3589 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 3590 | 60 | } | 3591 | 60 | } |
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m |
3592 | | |
3593 | | template <bool is_ascii, bool str_const, bool pos_const> |
3594 | | void vector_search(const ColumnString* col_left, const ColumnString* col_right, |
3595 | | const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res, |
3596 | 330 | size_t size) const { |
3597 | 330 | res.resize(size); |
3598 | 330 | StringSearch search; |
3599 | 774 | for (int i = 0; i < size; ++i) { |
3600 | 444 | StringRef substr = col_left->get_data_at(i); |
3601 | 444 | search.set_pattern(&substr); |
3602 | 444 | res[i] = locate_pos<is_ascii>(substr, |
3603 | 444 | col_right->get_data_at(index_check_const<str_const>(i)), |
3604 | 444 | search, posdata[index_check_const<pos_const>(i)]); |
3605 | 444 | } |
3606 | 330 | } _ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 23 | size_t size) const { | 3597 | 23 | res.resize(size); | 3598 | 23 | StringSearch search; | 3599 | 71 | for (int i = 0; i < size; ++i) { | 3600 | 48 | StringRef substr = col_left->get_data_at(i); | 3601 | 48 | search.set_pattern(&substr); | 3602 | 48 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 48 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 48 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 48 | } | 3606 | 23 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 22 | size_t size) const { | 3597 | 22 | res.resize(size); | 3598 | 22 | StringSearch search; | 3599 | 44 | for (int i = 0; i < size; ++i) { | 3600 | 22 | StringRef substr = col_left->get_data_at(i); | 3601 | 22 | search.set_pattern(&substr); | 3602 | 22 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 22 | } | 3606 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 22 | size_t size) const { | 3597 | 22 | res.resize(size); | 3598 | 22 | StringSearch search; | 3599 | 44 | for (int i = 0; i < size; ++i) { | 3600 | 22 | StringRef substr = col_left->get_data_at(i); | 3601 | 22 | search.set_pattern(&substr); | 3602 | 22 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 22 | } | 3606 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 22 | size_t size) const { | 3597 | 22 | res.resize(size); | 3598 | 22 | StringSearch search; | 3599 | 44 | for (int i = 0; i < size; ++i) { | 3600 | 22 | StringRef substr = col_left->get_data_at(i); | 3601 | 22 | search.set_pattern(&substr); | 3602 | 22 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 22 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 22 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 22 | } | 3606 | 22 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 61 | size_t size) const { | 3597 | 61 | res.resize(size); | 3598 | 61 | StringSearch search; | 3599 | 211 | for (int i = 0; i < size; ++i) { | 3600 | 150 | StringRef substr = col_left->get_data_at(i); | 3601 | 150 | search.set_pattern(&substr); | 3602 | 150 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 150 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 150 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 150 | } | 3606 | 61 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 60 | size_t size) const { | 3597 | 60 | res.resize(size); | 3598 | 60 | StringSearch search; | 3599 | 120 | for (int i = 0; i < size; ++i) { | 3600 | 60 | StringRef substr = col_left->get_data_at(i); | 3601 | 60 | search.set_pattern(&substr); | 3602 | 60 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 60 | } | 3606 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 60 | size_t size) const { | 3597 | 60 | res.resize(size); | 3598 | 60 | StringSearch search; | 3599 | 120 | for (int i = 0; i < size; ++i) { | 3600 | 60 | StringRef substr = col_left->get_data_at(i); | 3601 | 60 | search.set_pattern(&substr); | 3602 | 60 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 60 | } | 3606 | 60 | } |
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m Line | Count | Source | 3596 | 60 | size_t size) const { | 3597 | 60 | res.resize(size); | 3598 | 60 | StringSearch search; | 3599 | 120 | for (int i = 0; i < size; ++i) { | 3600 | 60 | StringRef substr = col_left->get_data_at(i); | 3601 | 60 | search.set_pattern(&substr); | 3602 | 60 | res[i] = locate_pos<is_ascii>(substr, | 3603 | 60 | col_right->get_data_at(index_check_const<str_const>(i)), | 3604 | 60 | search, posdata[index_check_const<pos_const>(i)]); | 3605 | 60 | } | 3606 | 60 | } |
|
3607 | | |
3608 | | template <bool is_ascii> |
3609 | 690 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { |
3610 | 690 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { |
3611 | | // BEHAVIOR COMPATIBLE WITH MYSQL |
3612 | | // locate('','') locate('','',1) locate('','',2) |
3613 | | // 1 1 0 |
3614 | 11 | return 1; |
3615 | 11 | } |
3616 | 679 | if (is_ascii) { |
3617 | 499 | return locate_pos_ascii(substr, str, search, start_pos); |
3618 | 499 | } else { |
3619 | 180 | return locate_pos_utf8(substr, str, search, start_pos); |
3620 | 180 | } |
3621 | 679 | } _ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 3609 | 180 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 3610 | 180 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 3611 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 3612 | | // locate('','') locate('','',1) locate('','',2) | 3613 | | // 1 1 0 | 3614 | 0 | return 1; | 3615 | 0 | } | 3616 | 180 | if (is_ascii) { | 3617 | 0 | return locate_pos_ascii(substr, str, search, start_pos); | 3618 | 180 | } else { | 3619 | 180 | return locate_pos_utf8(substr, str, search, start_pos); | 3620 | 180 | } | 3621 | 180 | } |
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi Line | Count | Source | 3609 | 510 | int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const { | 3610 | 510 | if (str.size == 0 && substr.size == 0 && start_pos == 1) { | 3611 | | // BEHAVIOR COMPATIBLE WITH MYSQL | 3612 | | // locate('','') locate('','',1) locate('','',2) | 3613 | | // 1 1 0 | 3614 | 11 | return 1; | 3615 | 11 | } | 3616 | 499 | if (is_ascii) { | 3617 | 499 | return locate_pos_ascii(substr, str, search, start_pos); | 3618 | 499 | } else { | 3619 | 0 | return locate_pos_utf8(substr, str, search, start_pos); | 3620 | 0 | } | 3621 | 499 | } |
|
3622 | | |
3623 | | int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search, |
3624 | 180 | int start_pos) const { |
3625 | 180 | std::vector<size_t> index; |
3626 | 180 | size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index); |
3627 | 180 | if (start_pos <= 0 || start_pos > char_len) { |
3628 | 43 | return 0; |
3629 | 43 | } |
3630 | 137 | if (substr.size == 0) { |
3631 | 17 | return start_pos; |
3632 | 17 | } |
3633 | | // Input start_pos starts from 1. |
3634 | 120 | StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]); |
3635 | 120 | int32_t match_pos = search.search(&adjusted_str); |
3636 | 120 | if (match_pos >= 0) { |
3637 | | // Hive returns the position in the original string starting from 1. |
3638 | 104 | return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos); |
3639 | 104 | } else { |
3640 | 16 | return 0; |
3641 | 16 | } |
3642 | 120 | } |
3643 | | |
3644 | | int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search, |
3645 | 499 | int start_pos) const { |
3646 | 499 | if (start_pos <= 0 || start_pos > str.size) { |
3647 | 367 | return 0; |
3648 | 367 | } |
3649 | 132 | if (substr.size == 0) { |
3650 | 36 | return start_pos; |
3651 | 36 | } |
3652 | | // Input start_pos starts from 1. |
3653 | 96 | StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1); |
3654 | 96 | int32_t match_pos = search.search(&adjusted_str); |
3655 | 96 | if (match_pos >= 0) { |
3656 | | // Hive returns the position in the original string starting from 1. |
3657 | 40 | return start_pos + match_pos; |
3658 | 56 | } else { |
3659 | 56 | return 0; |
3660 | 56 | } |
3661 | 96 | } |
3662 | | }; |
3663 | | |
3664 | | struct ReplaceImpl { |
3665 | | static constexpr auto name = "replace"; |
3666 | | }; |
3667 | | |
3668 | | struct ReplaceEmptyImpl { |
3669 | | static constexpr auto name = "replace_empty"; |
3670 | | }; |
3671 | | |
3672 | | template <typename Impl, bool empty> |
3673 | | class FunctionReplace : public IFunction { |
3674 | | public: |
3675 | | static constexpr auto name = Impl::name; |
3676 | 3.42k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv Line | Count | Source | 3676 | 1.90k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); } |
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv Line | Count | Source | 3676 | 1.52k | static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); } |
|
3677 | 2 | String get_name() const override { return name; }_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev Line | Count | Source | 3677 | 1 | String get_name() const override { return name; } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev Line | Count | Source | 3677 | 1 | String get_name() const override { return name; } |
|
3678 | 3.40k | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv Line | Count | Source | 3678 | 1.89k | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv Line | Count | Source | 3678 | 1.51k | size_t get_number_of_arguments() const override { return 3; } |
|
3679 | | |
3680 | 3.40k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3681 | 3.40k | return std::make_shared<DataTypeString>(); |
3682 | 3.40k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 3680 | 1.89k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 3681 | 1.89k | return std::make_shared<DataTypeString>(); | 3682 | 1.89k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 3680 | 1.51k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 3681 | 1.51k | return std::make_shared<DataTypeString>(); | 3682 | 1.51k | } |
|
3683 | | |
3684 | 14 | DataTypes get_variadic_argument_types_impl() const override { |
3685 | 14 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
3686 | 14 | std::make_shared<DataTypeString>()}; |
3687 | 14 | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv Line | Count | Source | 3684 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 3685 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 3686 | 7 | std::make_shared<DataTypeString>()}; | 3687 | 7 | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv Line | Count | Source | 3684 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 3685 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), | 3686 | 7 | std::make_shared<DataTypeString>()}; | 3687 | 7 | } |
|
3688 | | |
3689 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3690 | 2.12k | uint32_t result, size_t input_rows_count) const override { |
3691 | | // We need a local variable to hold a reference to the converted column. |
3692 | | // So that the converted column will not be released before we use it. |
3693 | 2.12k | ColumnPtr col[3]; |
3694 | 2.12k | bool col_const[3]; |
3695 | 8.49k | for (size_t i = 0; i < 3; ++i) { |
3696 | 6.37k | std::tie(col[i], col_const[i]) = |
3697 | 6.37k | unpack_if_const(block.get_by_position(arguments[i]).column); |
3698 | 6.37k | } |
3699 | | |
3700 | 2.12k | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); |
3701 | 2.12k | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); |
3702 | 2.12k | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); |
3703 | | |
3704 | 2.12k | ColumnString::MutablePtr col_res = ColumnString::create(); |
3705 | | |
3706 | 2.12k | std::visit( |
3707 | 2.12k | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { |
3708 | 7.54k | for (int i = 0; i < input_rows_count; ++i) { |
3709 | 5.42k | StringRef origin_str = |
3710 | 5.42k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); |
3711 | 5.42k | StringRef old_str = |
3712 | 5.42k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); |
3713 | 5.42k | StringRef new_str = |
3714 | 5.42k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); |
3715 | | |
3716 | 5.42k | std::string result = |
3717 | 5.42k | replace(origin_str.to_string(), old_str.to_string_view(), |
3718 | 5.42k | new_str.to_string_view()); |
3719 | | |
3720 | 5.42k | col_res->insert_data(result.data(), result.length()); |
3721 | 5.42k | } |
3722 | 2.12k | }, _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 126 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 467 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 341 | StringRef origin_str = | 3710 | 341 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 341 | StringRef old_str = | 3712 | 341 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 341 | StringRef new_str = | 3714 | 341 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 341 | std::string result = | 3717 | 341 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 341 | new_str.to_string_view()); | 3719 | | | 3720 | 341 | col_res->insert_data(result.data(), result.length()); | 3721 | 341 | } | 3722 | 126 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 497 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 3.86k | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 3.36k | StringRef origin_str = | 3710 | 3.36k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 3.36k | StringRef old_str = | 3712 | 3.36k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 3.36k | StringRef new_str = | 3714 | 3.36k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 3.36k | std::string result = | 3717 | 3.36k | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 3.36k | new_str.to_string_view()); | 3719 | | | 3720 | 3.36k | col_res->insert_data(result.data(), result.length()); | 3721 | 3.36k | } | 3722 | 497 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 126 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 467 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 341 | StringRef origin_str = | 3710 | 341 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 341 | StringRef old_str = | 3712 | 341 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 341 | StringRef new_str = | 3714 | 341 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 341 | std::string result = | 3717 | 341 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 341 | new_str.to_string_view()); | 3719 | | | 3720 | 341 | col_res->insert_data(result.data(), result.length()); | 3721 | 341 | } | 3722 | 126 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 3707 | 125 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 250 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 125 | StringRef origin_str = | 3710 | 125 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 125 | StringRef old_str = | 3712 | 125 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 125 | StringRef new_str = | 3714 | 125 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 125 | std::string result = | 3717 | 125 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 125 | new_str.to_string_view()); | 3719 | | | 3720 | 125 | col_res->insert_data(result.data(), result.length()); | 3721 | 125 | } | 3722 | 125 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
3723 | 2.12k | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
3724 | 2.12k | make_bool_variant(col_const[2])); |
3725 | | |
3726 | 2.12k | block.replace_by_position(result, std::move(col_res)); |
3727 | 2.12k | return Status::OK(); |
3728 | 2.12k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 3690 | 1.24k | uint32_t result, size_t input_rows_count) const override { | 3691 | | // We need a local variable to hold a reference to the converted column. | 3692 | | // So that the converted column will not be released before we use it. | 3693 | 1.24k | ColumnPtr col[3]; | 3694 | 1.24k | bool col_const[3]; | 3695 | 4.99k | for (size_t i = 0; i < 3; ++i) { | 3696 | 3.74k | std::tie(col[i], col_const[i]) = | 3697 | 3.74k | unpack_if_const(block.get_by_position(arguments[i]).column); | 3698 | 3.74k | } | 3699 | | | 3700 | 1.24k | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); | 3701 | 1.24k | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); | 3702 | 1.24k | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); | 3703 | | | 3704 | 1.24k | ColumnString::MutablePtr col_res = ColumnString::create(); | 3705 | | | 3706 | 1.24k | std::visit( | 3707 | 1.24k | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 1.24k | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 1.24k | StringRef origin_str = | 3710 | 1.24k | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 1.24k | StringRef old_str = | 3712 | 1.24k | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 1.24k | StringRef new_str = | 3714 | 1.24k | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 1.24k | std::string result = | 3717 | 1.24k | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 1.24k | new_str.to_string_view()); | 3719 | | | 3720 | 1.24k | col_res->insert_data(result.data(), result.length()); | 3721 | 1.24k | } | 3722 | 1.24k | }, | 3723 | 1.24k | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 3724 | 1.24k | make_bool_variant(col_const[2])); | 3725 | | | 3726 | 1.24k | block.replace_by_position(result, std::move(col_res)); | 3727 | 1.24k | return Status::OK(); | 3728 | 1.24k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 3690 | 876 | uint32_t result, size_t input_rows_count) const override { | 3691 | | // We need a local variable to hold a reference to the converted column. | 3692 | | // So that the converted column will not be released before we use it. | 3693 | 876 | ColumnPtr col[3]; | 3694 | 876 | bool col_const[3]; | 3695 | 3.50k | for (size_t i = 0; i < 3; ++i) { | 3696 | 2.62k | std::tie(col[i], col_const[i]) = | 3697 | 2.62k | unpack_if_const(block.get_by_position(arguments[i]).column); | 3698 | 2.62k | } | 3699 | | | 3700 | 876 | const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get()); | 3701 | 876 | const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get()); | 3702 | 876 | const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get()); | 3703 | | | 3704 | 876 | ColumnString::MutablePtr col_res = ColumnString::create(); | 3705 | | | 3706 | 876 | std::visit( | 3707 | 876 | [&](auto origin_str_const, auto old_str_const, auto new_str_const) { | 3708 | 876 | for (int i = 0; i < input_rows_count; ++i) { | 3709 | 876 | StringRef origin_str = | 3710 | 876 | col_origin_str->get_data_at(index_check_const<origin_str_const>(i)); | 3711 | 876 | StringRef old_str = | 3712 | 876 | col_old_str->get_data_at(index_check_const<old_str_const>(i)); | 3713 | 876 | StringRef new_str = | 3714 | 876 | col_new_str->get_data_at(index_check_const<new_str_const>(i)); | 3715 | | | 3716 | 876 | std::string result = | 3717 | 876 | replace(origin_str.to_string(), old_str.to_string_view(), | 3718 | 876 | new_str.to_string_view()); | 3719 | | | 3720 | 876 | col_res->insert_data(result.data(), result.length()); | 3721 | 876 | } | 3722 | 876 | }, | 3723 | 876 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 3724 | 876 | make_bool_variant(col_const[2])); | 3725 | | | 3726 | 876 | block.replace_by_position(result, std::move(col_res)); | 3727 | 876 | return Status::OK(); | 3728 | 876 | } |
|
3729 | | |
3730 | | private: |
3731 | 5.42k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { |
3732 | 5.42k | if (old_str.empty()) { |
3733 | 494 | if constexpr (empty) { |
3734 | 247 | return str; |
3735 | 247 | } else { |
3736 | | // Different from "Replace" only when the search string is empty. |
3737 | | // it will insert `new_str` in front of every character and at the end of the old str. |
3738 | 247 | if (new_str.empty()) { |
3739 | 59 | return str; |
3740 | 59 | } |
3741 | 188 | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { |
3742 | 188 | std::string result; |
3743 | 188 | ColumnString::check_chars_length( |
3744 | 188 | str.length() * (new_str.length() + 1) + new_str.length(), 0); |
3745 | 188 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); |
3746 | 648 | for (char c : str) { |
3747 | 648 | result += new_str; |
3748 | 648 | result += c; |
3749 | 648 | } |
3750 | 188 | result += new_str; |
3751 | 188 | return result; |
3752 | 188 | } else { |
3753 | 0 | std::string result; |
3754 | 0 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); |
3755 | 0 | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { |
3756 | 0 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; |
3757 | 0 | result += new_str; |
3758 | 0 | result.append(&str[i], utf8_char_len); |
3759 | 0 | } |
3760 | 0 | result += new_str; |
3761 | 0 | ColumnString::check_chars_length(result.size(), 0); |
3762 | 0 | return result; |
3763 | 0 | } |
3764 | 188 | } |
3765 | 4.93k | } else { |
3766 | 4.93k | std::string::size_type pos = 0; |
3767 | 4.93k | std::string::size_type oldLen = old_str.size(); |
3768 | 4.93k | std::string::size_type newLen = new_str.size(); |
3769 | 6.06k | while ((pos = str.find(old_str, pos)) != std::string::npos) { |
3770 | 1.13k | str.replace(pos, oldLen, new_str); |
3771 | 1.13k | pos += newLen; |
3772 | 1.13k | } |
3773 | 4.93k | return str; |
3774 | 4.93k | } |
3775 | 5.42k | } _ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_ Line | Count | Source | 3731 | 4.33k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { | 3732 | 4.33k | if (old_str.empty()) { | 3733 | 247 | if constexpr (empty) { | 3734 | 247 | return str; | 3735 | | } else { | 3736 | | // Different from "Replace" only when the search string is empty. | 3737 | | // it will insert `new_str` in front of every character and at the end of the old str. | 3738 | | if (new_str.empty()) { | 3739 | | return str; | 3740 | | } | 3741 | | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { | 3742 | | std::string result; | 3743 | | ColumnString::check_chars_length( | 3744 | | str.length() * (new_str.length() + 1) + new_str.length(), 0); | 3745 | | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 3746 | | for (char c : str) { | 3747 | | result += new_str; | 3748 | | result += c; | 3749 | | } | 3750 | | result += new_str; | 3751 | | return result; | 3752 | | } else { | 3753 | | std::string result; | 3754 | | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 3755 | | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { | 3756 | | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; | 3757 | | result += new_str; | 3758 | | result.append(&str[i], utf8_char_len); | 3759 | | } | 3760 | | result += new_str; | 3761 | | ColumnString::check_chars_length(result.size(), 0); | 3762 | | return result; | 3763 | | } | 3764 | | } | 3765 | 4.08k | } else { | 3766 | 4.08k | std::string::size_type pos = 0; | 3767 | 4.08k | std::string::size_type oldLen = old_str.size(); | 3768 | 4.08k | std::string::size_type newLen = new_str.size(); | 3769 | 4.89k | while ((pos = str.find(old_str, pos)) != std::string::npos) { | 3770 | 808 | str.replace(pos, oldLen, new_str); | 3771 | 808 | pos += newLen; | 3772 | 808 | } | 3773 | 4.08k | return str; | 3774 | 4.08k | } | 3775 | 4.33k | } |
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_ Line | Count | Source | 3731 | 1.09k | std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const { | 3732 | 1.09k | if (old_str.empty()) { | 3733 | | if constexpr (empty) { | 3734 | | return str; | 3735 | 247 | } else { | 3736 | | // Different from "Replace" only when the search string is empty. | 3737 | | // it will insert `new_str` in front of every character and at the end of the old str. | 3738 | 247 | if (new_str.empty()) { | 3739 | 59 | return str; | 3740 | 59 | } | 3741 | 188 | if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) { | 3742 | 188 | std::string result; | 3743 | 188 | ColumnString::check_chars_length( | 3744 | 188 | str.length() * (new_str.length() + 1) + new_str.length(), 0); | 3745 | 188 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 3746 | 648 | for (char c : str) { | 3747 | 648 | result += new_str; | 3748 | 648 | result += c; | 3749 | 648 | } | 3750 | 188 | result += new_str; | 3751 | 188 | return result; | 3752 | 188 | } else { | 3753 | 0 | std::string result; | 3754 | 0 | result.reserve(str.length() * (new_str.length() + 1) + new_str.length()); | 3755 | 0 | for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) { | 3756 | 0 | utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]]; | 3757 | 0 | result += new_str; | 3758 | 0 | result.append(&str[i], utf8_char_len); | 3759 | 0 | } | 3760 | 0 | result += new_str; | 3761 | 0 | ColumnString::check_chars_length(result.size(), 0); | 3762 | 0 | return result; | 3763 | 0 | } | 3764 | 188 | } | 3765 | 844 | } else { | 3766 | 844 | std::string::size_type pos = 0; | 3767 | 844 | std::string::size_type oldLen = old_str.size(); | 3768 | 844 | std::string::size_type newLen = new_str.size(); | 3769 | 1.17k | while ((pos = str.find(old_str, pos)) != std::string::npos) { | 3770 | 328 | str.replace(pos, oldLen, new_str); | 3771 | 328 | pos += newLen; | 3772 | 328 | } | 3773 | 844 | return str; | 3774 | 844 | } | 3775 | 1.09k | } |
|
3776 | | }; |
3777 | | |
3778 | | struct ReverseImpl { |
3779 | | static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
3780 | 53 | ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) { |
3781 | 53 | auto rows_count = offsets.size(); |
3782 | 53 | res_offsets.resize(rows_count); |
3783 | 53 | res_data.reserve(data.size()); |
3784 | 158 | for (ssize_t i = 0; i < rows_count; ++i) { |
3785 | 105 | auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
3786 | 105 | int64_t src_len = offsets[i] - offsets[i - 1]; |
3787 | 105 | std::string dst; |
3788 | 105 | dst.resize(src_len); |
3789 | 105 | simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst); |
3790 | 105 | StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data, |
3791 | 105 | res_offsets); |
3792 | 105 | } |
3793 | 53 | return Status::OK(); |
3794 | 53 | } |
3795 | | }; |
3796 | | |
3797 | | template <typename Impl> |
3798 | | class FunctionSubReplace : public IFunction { |
3799 | | public: |
3800 | | static constexpr auto name = "sub_replace"; |
3801 | | |
3802 | 16 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv Line | Count | Source | 3802 | 8 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } |
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv Line | Count | Source | 3802 | 8 | static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); } |
|
3803 | | |
3804 | 0 | String get_name() const override { return name; }Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev |
3805 | | |
3806 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
3807 | 0 | return make_nullable(std::make_shared<DataTypeString>()); |
3808 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE |
3809 | | |
3810 | 2 | bool is_variadic() const override { return true; }_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv Line | Count | Source | 3810 | 1 | bool is_variadic() const override { return true; } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv Line | Count | Source | 3810 | 1 | bool is_variadic() const override { return true; } |
|
3811 | | |
3812 | 14 | DataTypes get_variadic_argument_types_impl() const override { |
3813 | 14 | return Impl::get_variadic_argument_types(); |
3814 | 14 | } _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv Line | Count | Source | 3812 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 3813 | 7 | return Impl::get_variadic_argument_types(); | 3814 | 7 | } |
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv Line | Count | Source | 3812 | 7 | DataTypes get_variadic_argument_types_impl() const override { | 3813 | 7 | return Impl::get_variadic_argument_types(); | 3814 | 7 | } |
|
3815 | | |
3816 | 0 | size_t get_number_of_arguments() const override { |
3817 | 0 | return get_variadic_argument_types_impl().size(); |
3818 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv |
3819 | | |
3820 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
3821 | 0 | uint32_t result, size_t input_rows_count) const override { |
3822 | 0 | return Impl::execute_impl(context, block, arguments, result, input_rows_count); |
3823 | 0 | } Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm |
3824 | | }; |
3825 | | |
3826 | | struct SubReplaceImpl { |
3827 | | static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result, |
3828 | 1 | size_t input_rows_count) { |
3829 | 1 | auto res_column = ColumnString::create(); |
3830 | 1 | auto* result_column = assert_cast<ColumnString*>(res_column.get()); |
3831 | 1 | auto args_null_map = ColumnUInt8::create(input_rows_count, 0); |
3832 | 1 | ColumnPtr argument_columns[4]; |
3833 | 1 | bool col_const[4]; |
3834 | 5 | for (int i = 0; i < 4; ++i) { |
3835 | 4 | std::tie(argument_columns[i], col_const[i]) = |
3836 | 4 | unpack_if_const(block.get_by_position(arguments[i]).column); |
3837 | 4 | } |
3838 | 1 | const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get()); |
3839 | 1 | const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get()); |
3840 | 1 | const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get()); |
3841 | 1 | const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get()); |
3842 | | |
3843 | 1 | std::visit( |
3844 | 1 | [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { |
3845 | 1 | if (data_column->is_ascii()) { |
3846 | 1 | vector_ascii<origin_str_const, new_str_const, start_const, len_const>( |
3847 | 1 | data_column, mask_column, start_column->get_data(), |
3848 | 1 | length_column->get_data(), args_null_map->get_data(), result_column, |
3849 | 1 | input_rows_count); |
3850 | 1 | } else { |
3851 | 0 | vector_utf8<origin_str_const, new_str_const, start_const, len_const>( |
3852 | 0 | data_column, mask_column, start_column->get_data(), |
3853 | 0 | length_column->get_data(), args_null_map->get_data(), result_column, |
3854 | 0 | input_rows_count); |
3855 | 0 | } |
3856 | 1 | }, _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_ Line | Count | Source | 3844 | 1 | [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { | 3845 | 1 | if (data_column->is_ascii()) { | 3846 | 1 | vector_ascii<origin_str_const, new_str_const, start_const, len_const>( | 3847 | 1 | data_column, mask_column, start_column->get_data(), | 3848 | 1 | length_column->get_data(), args_null_map->get_data(), result_column, | 3849 | 1 | input_rows_count); | 3850 | 1 | } else { | 3851 | 0 | vector_utf8<origin_str_const, new_str_const, start_const, len_const>( | 3852 | 0 | data_column, mask_column, start_column->get_data(), | 3853 | 0 | length_column->get_data(), args_null_map->get_data(), result_column, | 3854 | 0 | input_rows_count); | 3855 | 0 | } | 3856 | 1 | }, |
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_ Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_ |
3857 | 1 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
3858 | 1 | make_bool_variant(col_const[2]), make_bool_variant(col_const[3])); |
3859 | 1 | block.get_by_position(result).column = |
3860 | 1 | ColumnNullable::create(std::move(res_column), std::move(args_null_map)); |
3861 | 1 | return Status::OK(); |
3862 | 1 | } |
3863 | | |
3864 | | private: |
3865 | | template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const> |
3866 | | static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column, |
3867 | | const PaddedPODArray<Int32>& args_start, |
3868 | | const PaddedPODArray<Int32>& args_length, NullMap& args_null_map, |
3869 | 1 | ColumnString* result_column, size_t input_rows_count) { |
3870 | 1 | ColumnString::Chars& res_chars = result_column->get_chars(); |
3871 | 1 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); |
3872 | 10.2k | for (size_t row = 0; row < input_rows_count; ++row) { |
3873 | 10.2k | StringRef origin_str = |
3874 | 10.2k | data_column->get_data_at(index_check_const<origin_str_const>(row)); |
3875 | 10.2k | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); |
3876 | 10.2k | const auto start = args_start[index_check_const<start_const>(row)]; |
3877 | 10.2k | const auto length = args_length[index_check_const<len_const>(row)]; |
3878 | 10.2k | const size_t origin_str_len = origin_str.size; |
3879 | | //input is null, start < 0, len < 0, str_size <= start. return NULL |
3880 | 10.2k | if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) { |
3881 | 10.2k | res_offsets.push_back(res_chars.size()); |
3882 | 10.2k | args_null_map[row] = 1; |
3883 | 10.2k | } else { |
3884 | 0 | std::string_view replace_str = new_str.to_string_view(); |
3885 | 0 | std::string result = origin_str.to_string(); |
3886 | 0 | result.replace(start, length, replace_str); |
3887 | 0 | result_column->insert_data(result.data(), result.length()); |
3888 | 0 | } |
3889 | 10.2k | } |
3890 | 1 | } _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Line | Count | Source | 3869 | 1 | ColumnString* result_column, size_t input_rows_count) { | 3870 | 1 | ColumnString::Chars& res_chars = result_column->get_chars(); | 3871 | 1 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); | 3872 | 10.2k | for (size_t row = 0; row < input_rows_count; ++row) { | 3873 | 10.2k | StringRef origin_str = | 3874 | 10.2k | data_column->get_data_at(index_check_const<origin_str_const>(row)); | 3875 | 10.2k | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); | 3876 | 10.2k | const auto start = args_start[index_check_const<start_const>(row)]; | 3877 | 10.2k | const auto length = args_length[index_check_const<len_const>(row)]; | 3878 | 10.2k | const size_t origin_str_len = origin_str.size; | 3879 | | //input is null, start < 0, len < 0, str_size <= start. return NULL | 3880 | 10.2k | if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) { | 3881 | 10.2k | res_offsets.push_back(res_chars.size()); | 3882 | 10.2k | args_null_map[row] = 1; | 3883 | 10.2k | } else { | 3884 | 0 | std::string_view replace_str = new_str.to_string_view(); | 3885 | 0 | std::string result = origin_str.to_string(); | 3886 | 0 | result.replace(start, length, replace_str); | 3887 | 0 | result_column->insert_data(result.data(), result.length()); | 3888 | 0 | } | 3889 | 10.2k | } | 3890 | 1 | } |
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m |
3891 | | |
3892 | | template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const> |
3893 | | static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column, |
3894 | | const PaddedPODArray<Int32>& args_start, |
3895 | | const PaddedPODArray<Int32>& args_length, NullMap& args_null_map, |
3896 | 0 | ColumnString* result_column, size_t input_rows_count) { |
3897 | 0 | ColumnString::Chars& res_chars = result_column->get_chars(); |
3898 | 0 | ColumnString::Offsets& res_offsets = result_column->get_offsets(); |
3899 | |
|
3900 | 0 | for (size_t row = 0; row < input_rows_count; ++row) { |
3901 | 0 | StringRef origin_str = |
3902 | 0 | data_column->get_data_at(index_check_const<origin_str_const>(row)); |
3903 | 0 | StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row)); |
3904 | 0 | const auto start = args_start[index_check_const<start_const>(row)]; |
3905 | 0 | const auto length = args_length[index_check_const<len_const>(row)]; |
3906 | | //input is null, start < 0, len < 0 return NULL |
3907 | 0 | if (args_null_map[row] || start < 0 || length < 0) { |
3908 | 0 | res_offsets.push_back(res_chars.size()); |
3909 | 0 | args_null_map[row] = 1; |
3910 | 0 | continue; |
3911 | 0 | } |
3912 | | |
3913 | 0 | const auto [start_byte_len, start_char_len] = |
3914 | 0 | simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(), |
3915 | 0 | origin_str.end(), start); |
3916 | | |
3917 | | // start >= orgin.size |
3918 | 0 | DCHECK(start_char_len <= start); |
3919 | 0 | if (start_byte_len == origin_str.size) { |
3920 | 0 | res_offsets.push_back(res_chars.size()); |
3921 | 0 | args_null_map[row] = 1; |
3922 | 0 | continue; |
3923 | 0 | } |
3924 | | |
3925 | 0 | auto [end_byte_len, end_char_len] = |
3926 | 0 | simd::VStringFunctions::iterate_utf8_with_limit_length( |
3927 | 0 | origin_str.begin() + start_byte_len, origin_str.end(), length); |
3928 | 0 | DCHECK(end_char_len <= length); |
3929 | 0 | std::string_view replace_str = new_str.to_string_view(); |
3930 | 0 | std::string result = origin_str.to_string(); |
3931 | 0 | result.replace(start_byte_len, end_byte_len, replace_str); |
3932 | 0 | result_column->insert_data(result.data(), result.length()); |
3933 | 0 | } |
3934 | 0 | } Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m |
3935 | | }; |
3936 | | |
3937 | | struct SubReplaceThreeImpl { |
3938 | 7 | static DataTypes get_variadic_argument_types() { |
3939 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
3940 | 7 | std::make_shared<DataTypeInt32>()}; |
3941 | 7 | } |
3942 | | |
3943 | | static Status execute_impl(FunctionContext* context, Block& block, |
3944 | | const ColumnNumbers& arguments, uint32_t result, |
3945 | 0 | size_t input_rows_count) { |
3946 | 0 | auto params = ColumnInt32::create(input_rows_count); |
3947 | 0 | auto& strlen_data = params->get_data(); |
3948 | |
|
3949 | 0 | auto str_col = |
3950 | 0 | block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); |
3951 | 0 | if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) { |
3952 | 0 | str_col = nullable->get_nested_column_ptr(); |
3953 | 0 | } |
3954 | 0 | const auto* str_column = assert_cast<const ColumnString*>(str_col.get()); |
3955 | | // use utf8 len |
3956 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
3957 | 0 | StringRef str_ref = str_column->get_data_at(i); |
3958 | 0 | strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size); |
3959 | 0 | } |
3960 | |
|
3961 | 0 | block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"}); |
3962 | 0 | ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2], |
3963 | 0 | block.columns() - 1}; |
3964 | 0 | return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count); |
3965 | 0 | } |
3966 | | }; |
3967 | | |
3968 | | struct SubReplaceFourImpl { |
3969 | 7 | static DataTypes get_variadic_argument_types() { |
3970 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
3971 | 7 | std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()}; |
3972 | 7 | } |
3973 | | |
3974 | | static Status execute_impl(FunctionContext* context, Block& block, |
3975 | | const ColumnNumbers& arguments, uint32_t result, |
3976 | 0 | size_t input_rows_count) { |
3977 | 0 | return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count); |
3978 | 0 | } |
3979 | | }; |
3980 | | |
3981 | | class FunctionConvertTo : public IFunction { |
3982 | | public: |
3983 | | static constexpr auto name = "convert_to"; |
3984 | | |
3985 | 8 | static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); } |
3986 | | |
3987 | 1 | String get_name() const override { return name; } |
3988 | | |
3989 | 0 | size_t get_number_of_arguments() const override { return 2; } |
3990 | | |
3991 | 0 | DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override { |
3992 | 0 | return std::make_shared<DataTypeString>(); |
3993 | 0 | } |
3994 | | |
3995 | 0 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
3996 | 0 | if (scope != FunctionContext::THREAD_LOCAL) { |
3997 | 0 | return Status::OK(); |
3998 | 0 | } |
3999 | 0 | if (!context->is_col_constant(1)) { |
4000 | 0 | return Status::InvalidArgument( |
4001 | 0 | "character argument to convert function must be constant."); |
4002 | 0 | } |
4003 | 0 | const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0); |
4004 | 0 | if (!iequal(character_data.to_string(), "gbk")) { |
4005 | 0 | return Status::RuntimeError( |
4006 | 0 | "Illegal second argument column of function convert. now only support " |
4007 | 0 | "convert to character set of gbk"); |
4008 | 0 | } |
4009 | | |
4010 | 0 | return Status::OK(); |
4011 | 0 | } |
4012 | | |
4013 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
4014 | 0 | uint32_t result, size_t input_rows_count) const override { |
4015 | 0 | ColumnPtr argument_column = |
4016 | 0 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
4017 | 0 | const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get()); |
4018 | 0 | const auto& str_offset = str_col->get_offsets(); |
4019 | 0 | const auto& str_chars = str_col->get_chars(); |
4020 | 0 | auto col_res = ColumnString::create(); |
4021 | 0 | auto& res_offset = col_res->get_offsets(); |
4022 | 0 | auto& res_chars = col_res->get_chars(); |
4023 | 0 | res_offset.resize(input_rows_count); |
4024 | | // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3 |
4025 | 0 | size_t pinyin_size = (str_chars.size() + 2) / 3 * 7; |
4026 | 0 | ColumnString::check_chars_length(pinyin_size, 0); |
4027 | 0 | res_chars.resize(pinyin_size); |
4028 | |
|
4029 | 0 | size_t in_len = 0, out_len = 0; |
4030 | 0 | for (int i = 0; i < input_rows_count; ++i) { |
4031 | 0 | in_len = str_offset[i] - str_offset[i - 1]; |
4032 | 0 | const char* in = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]); |
4033 | 0 | char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]); |
4034 | 0 | _utf8_to_pinyin(in, in_len, out, &out_len); |
4035 | 0 | res_offset[i] = res_offset[i - 1] + out_len; |
4036 | 0 | } |
4037 | 0 | res_chars.resize(res_offset[input_rows_count - 1]); |
4038 | 0 | block.replace_by_position(result, std::move(col_res)); |
4039 | 0 | return Status::OK(); |
4040 | 0 | } |
4041 | | |
4042 | 0 | void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) const { |
4043 | 0 | auto do_memcpy = [](char*& dest, const char*& from, size_t size) { |
4044 | 0 | memcpy_small_allow_read_write_overflow15(dest, from, size); |
4045 | 0 | dest += size; |
4046 | 0 | from += size; |
4047 | 0 | }; |
4048 | 0 | auto from = in; |
4049 | 0 | auto dest = out; |
4050 | |
|
4051 | 0 | while (from - in < in_len) { |
4052 | 0 | auto length = get_utf8_byte_length(*from); |
4053 | 0 | if (length != 3) { |
4054 | 0 | do_memcpy(dest, from, length); |
4055 | 0 | } else { |
4056 | | // convert utf8 to unicode code to get pinyin offset |
4057 | 0 | if (auto tmp = (((int)(*from & 0x0F)) << 12) | (((int)(*(from + 1) & 0x3F)) << 6) | |
4058 | 0 | (*(from + 2) & 0x3F); |
4059 | 0 | tmp >= START_UNICODE_OFFSET and tmp < END_UNICODE_OFFSET) { |
4060 | 0 | const char* buf = nullptr; |
4061 | 0 | if (tmp >= START_UNICODE_OFFSET && tmp < MID_UNICODE_OFFSET) { |
4062 | 0 | buf = PINYIN_DICT1 + (tmp - START_UNICODE_OFFSET) * MAX_PINYIN_LEN; |
4063 | 0 | } else if (tmp >= MID_UNICODE_OFFSET && tmp < END_UNICODE_OFFSET) { |
4064 | 0 | buf = PINYIN_DICT2 + (tmp - MID_UNICODE_OFFSET) * MAX_PINYIN_LEN; |
4065 | 0 | } |
4066 | |
|
4067 | 0 | auto end = strchr(buf, ' '); |
4068 | | // max len for pinyin is 6 |
4069 | 0 | int len = MAX_PINYIN_LEN; |
4070 | 0 | if (end != nullptr && end - buf < MAX_PINYIN_LEN) { |
4071 | 0 | len = end - buf; |
4072 | 0 | } |
4073 | | // set first char '~' just make sure all english word lower than chinese word |
4074 | 0 | *dest = 126; |
4075 | 0 | memcpy(dest + 1, buf, len); |
4076 | 0 | dest += (len + 1); |
4077 | 0 | from += 3; |
4078 | 0 | } else { |
4079 | 0 | do_memcpy(dest, from, 3); |
4080 | 0 | } |
4081 | 0 | } |
4082 | 0 | } |
4083 | |
|
4084 | 0 | *out_len = dest - out; |
4085 | 0 | } |
4086 | | }; |
4087 | | |
4088 | | // refer to https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char |
4089 | | // UTF8 |
4090 | | // 多 0xe5, 0xa4, 0x9a 0xb6, 0xe0 |
4091 | | // 睿 0xe7, 0x9d, 0xbf 0xee, 0xa3 |
4092 | | // 丝 0xe4, 0xb8, 0x9d 0xcb, 0xbf 14989469 |
4093 | | // MySQL behaviour: |
4094 | | // mysql> select char(0xe4, 0xb8, 0x9d using utf8); |
4095 | | // +-----------------------------------+ |
4096 | | // | char(0xe4, 0xb8, 0x9d using utf8) | |
4097 | | // +-----------------------------------+ |
4098 | | // | 丝 | |
4099 | | // +-----------------------------------+ |
4100 | | // 1 row in set, 1 warning (0.00 sec) |
4101 | | // mysql> select char(14989469 using utf8); |
4102 | | // +---------------------------+ |
4103 | | // | char(14989469 using utf8) | |
4104 | | // +---------------------------+ |
4105 | | // | 丝 | |
4106 | | // +---------------------------+ |
4107 | | // 1 row in set, 1 warning (0.00 sec) |
4108 | | // mysql> select char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8); |
4109 | | // +---------------------------------------------------------------------------------------------+ |
4110 | | // | char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8) | |
4111 | | // +---------------------------------------------------------------------------------------------+ |
4112 | | // | 多睿丝 Doris | |
4113 | | // +---------------------------------------------------------------------------------------------+ |
4114 | | // mysql> select char(68, 111, 114, 0, 105, null, 115 using utf8); |
4115 | | // +--------------------------------------------------+ |
4116 | | // | char(68, 111, 114, 0, 105, null, 115 using utf8) | |
4117 | | // +--------------------------------------------------+ |
4118 | | // | Dor is | |
4119 | | // +--------------------------------------------------+ |
4120 | | |
4121 | | // return null: |
4122 | | // mysql> select char(255 using utf8); |
4123 | | // +----------------------+ |
4124 | | // | char(255 using utf8) | |
4125 | | // +----------------------+ |
4126 | | // | NULL | |
4127 | | // +----------------------+ |
4128 | | // 1 row in set, 2 warnings (0.00 sec) |
4129 | | // |
4130 | | // mysql> show warnings; |
4131 | | // +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
4132 | | // | Level | Code | Message | |
4133 | | // +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
4134 | | // | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. | |
4135 | | // | Warning | 1300 | Invalid utf8mb3 character string: 'FF' | |
4136 | | // +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
4137 | | // 2 rows in set (0.01 sec) |
4138 | | |
4139 | | // max int value: |
4140 | | // mysql> select char(18446744073709551615); |
4141 | | // +--------------------------------------------------------+ |
4142 | | // | char(18446744073709551615) | |
4143 | | // +--------------------------------------------------------+ |
4144 | | // | 0xFFFFFFFF | |
4145 | | // +--------------------------------------------------------+ |
4146 | | // 1 row in set (0.00 sec) |
4147 | | // |
4148 | | // mysql> select char(18446744073709551616); |
4149 | | // +--------------------------------------------------------+ |
4150 | | // | char(18446744073709551616) | |
4151 | | // +--------------------------------------------------------+ |
4152 | | // | 0xFFFFFFFF | |
4153 | | // +--------------------------------------------------------+ |
4154 | | // 1 row in set, 1 warning (0.00 sec) |
4155 | | // |
4156 | | // mysql> show warnings; |
4157 | | // +---------+------+-----------------------------------------------------------+ |
4158 | | // | Level | Code | Message | |
4159 | | // +---------+------+-----------------------------------------------------------+ |
4160 | | // | Warning | 1292 | Truncated incorrect DECIMAL value: '18446744073709551616' | |
4161 | | // +---------+------+-----------------------------------------------------------+ |
4162 | | // 1 row in set (0.00 sec) |
4163 | | |
4164 | | // table columns: |
4165 | | // mysql> select * from t; |
4166 | | // +------+------+------+ |
4167 | | // | f1 | f2 | f3 | |
4168 | | // +------+------+------+ |
4169 | | // | 228 | 184 | 157 | |
4170 | | // | 228 | 184 | 0 | |
4171 | | // | 228 | 184 | 99 | |
4172 | | // | 99 | 228 | 184 | |
4173 | | // +------+------+------+ |
4174 | | // 4 rows in set (0.00 sec) |
4175 | | // |
4176 | | // mysql> select char(f1, f2, f3 using utf8) from t; |
4177 | | // +-----------------------------+ |
4178 | | // | char(f1, f2, f3 using utf8) | |
4179 | | // +-----------------------------+ |
4180 | | // | 丝 | |
4181 | | // | | |
4182 | | // | | |
4183 | | // | c | |
4184 | | // +-----------------------------+ |
4185 | | // 4 rows in set, 4 warnings (0.00 sec) |
4186 | | // |
4187 | | // mysql> show warnings; |
4188 | | // +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
4189 | | // | Level | Code | Message | |
4190 | | // +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
4191 | | // | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. | |
4192 | | // | Warning | 1300 | Invalid utf8mb3 character string: 'E4B800' | |
4193 | | // | Warning | 1300 | Invalid utf8mb3 character string: 'E4B863' | |
4194 | | // | Warning | 1300 | Invalid utf8mb3 character string: 'E4B8' | |
4195 | | // +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ |
4196 | | class FunctionIntToChar : public IFunction { |
4197 | | public: |
4198 | | static constexpr auto name = "char"; |
4199 | 8 | static FunctionPtr create() { return std::make_shared<FunctionIntToChar>(); } |
4200 | 0 | String get_name() const override { return name; } |
4201 | 0 | size_t get_number_of_arguments() const override { return 0; } |
4202 | 1 | bool is_variadic() const override { return true; } |
4203 | | |
4204 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
4205 | 0 | return make_nullable(std::make_shared<DataTypeString>()); |
4206 | 0 | } |
4207 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
4208 | | |
4209 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
4210 | 0 | uint32_t result, size_t input_rows_count) const override { |
4211 | 0 | DCHECK_GE(arguments.size(), 2); |
4212 | |
|
4213 | 0 | int argument_size = arguments.size(); |
4214 | 0 | std::vector<ColumnPtr> str_columns(argument_size - 1); |
4215 | 0 | std::vector<const ColumnString::Offsets*> offsets_list(argument_size - 1); |
4216 | 0 | std::vector<const ColumnString::Chars*> chars_list(argument_size - 1); |
4217 | | |
4218 | | // convert each argument columns to column string and then concat the string columns |
4219 | 0 | for (size_t i = 1; i < argument_size; ++i) { |
4220 | 0 | if (auto const_column = check_and_get_column<const ColumnConst>( |
4221 | 0 | *block.get_by_position(arguments[i]).column)) { |
4222 | | // ignore null |
4223 | 0 | if (const_column->only_null()) { |
4224 | 0 | str_columns[i - 1] = nullptr; |
4225 | 0 | } else { |
4226 | 0 | auto str_column = ColumnString::create(); |
4227 | 0 | auto& chars = str_column->get_chars(); |
4228 | 0 | auto& offsets = str_column->get_offsets(); |
4229 | 0 | offsets.resize(1); |
4230 | 0 | const ColumnInt32* int_column; |
4231 | 0 | if (auto* nullable = check_and_get_column<const ColumnNullable>( |
4232 | 0 | const_column->get_data_column())) { |
4233 | 0 | int_column = assert_cast<const ColumnInt32*>( |
4234 | 0 | nullable->get_nested_column_ptr().get()); |
4235 | 0 | } else { |
4236 | 0 | int_column = |
4237 | 0 | assert_cast<const ColumnInt32*>(&const_column->get_data_column()); |
4238 | 0 | } |
4239 | 0 | int int_val = int_column->get_int(0); |
4240 | 0 | integer_to_char_(0, &int_val, chars, offsets); |
4241 | 0 | str_columns[i - 1] = |
4242 | 0 | ColumnConst::create(std::move(str_column), input_rows_count); |
4243 | 0 | } |
4244 | 0 | offsets_list[i - 1] = nullptr; |
4245 | 0 | chars_list[i - 1] = nullptr; |
4246 | 0 | } else { |
4247 | 0 | auto str_column = ColumnString::create(); |
4248 | 0 | auto& chars = str_column->get_chars(); |
4249 | 0 | auto& offsets = str_column->get_offsets(); |
4250 | | // data.resize(input_rows_count); |
4251 | 0 | offsets.resize(input_rows_count); |
4252 | |
|
4253 | 0 | if (auto nullable = check_and_get_column<const ColumnNullable>( |
4254 | 0 | *block.get_by_position(arguments[i]).column)) { |
4255 | 0 | const auto* int_data = |
4256 | 0 | assert_cast<const ColumnInt32*>(nullable->get_nested_column_ptr().get()) |
4257 | 0 | ->get_data() |
4258 | 0 | .data(); |
4259 | 0 | const auto* null_map_data = nullable->get_null_map_data().data(); |
4260 | 0 | for (size_t j = 0; j < input_rows_count; ++j) { |
4261 | | // ignore null |
4262 | 0 | if (null_map_data[j]) { |
4263 | 0 | offsets[j] = offsets[j - 1]; |
4264 | 0 | } else { |
4265 | 0 | integer_to_char_(j, int_data + j, chars, offsets); |
4266 | 0 | } |
4267 | 0 | } |
4268 | 0 | } else { |
4269 | 0 | const auto* int_data = assert_cast<const ColumnInt32*>( |
4270 | 0 | block.get_by_position(arguments[i]).column.get()) |
4271 | 0 | ->get_data() |
4272 | 0 | .data(); |
4273 | 0 | for (size_t j = 0; j < input_rows_count; ++j) { |
4274 | 0 | integer_to_char_(j, int_data + j, chars, offsets); |
4275 | 0 | } |
4276 | 0 | } |
4277 | 0 | offsets_list[i - 1] = &str_column->get_offsets(); |
4278 | 0 | chars_list[i - 1] = &str_column->get_chars(); |
4279 | 0 | str_columns[i - 1] = std::move(str_column); |
4280 | 0 | } |
4281 | 0 | } |
4282 | |
|
4283 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
4284 | 0 | auto res = ColumnString::create(); |
4285 | 0 | auto& res_data = res->get_chars(); |
4286 | 0 | auto& res_offset = res->get_offsets(); |
4287 | |
|
4288 | 0 | size_t res_reserve_size = 0; |
4289 | 0 | for (size_t i = 0; i < argument_size - 1; ++i) { |
4290 | 0 | if (!str_columns[i]) { |
4291 | 0 | continue; |
4292 | 0 | } |
4293 | 0 | if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[i])) { |
4294 | 0 | auto str_column = |
4295 | 0 | assert_cast<const ColumnString*>(&(const_column->get_data_column())); |
4296 | 0 | auto& offsets = str_column->get_offsets(); |
4297 | 0 | res_reserve_size += (offsets[0] - offsets[-1]) * input_rows_count; |
4298 | 0 | } else { |
4299 | 0 | for (size_t j = 0; j < input_rows_count; ++j) { |
4300 | 0 | size_t append = (*offsets_list[i])[j] - (*offsets_list[i])[j - 1]; |
4301 | | // check whether the output might overflow(unlikely) |
4302 | 0 | if (UNLIKELY(UINT_MAX - append < res_reserve_size)) { |
4303 | 0 | return Status::BufferAllocFailed( |
4304 | 0 | "function char output is too large to allocate"); |
4305 | 0 | } |
4306 | 0 | res_reserve_size += append; |
4307 | 0 | } |
4308 | 0 | } |
4309 | 0 | } |
4310 | 0 | if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) { |
4311 | 0 | return Status::BufferAllocFailed("function char output is too large to allocate"); |
4312 | 0 | } |
4313 | 0 | ColumnString::check_chars_length(res_reserve_size, 0); |
4314 | 0 | res_data.resize(res_reserve_size); |
4315 | 0 | res_offset.resize(input_rows_count); |
4316 | |
|
4317 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
4318 | 0 | int current_length = 0; |
4319 | 0 | for (size_t j = 0; j < argument_size - 1; ++j) { |
4320 | 0 | if (!str_columns[j]) { |
4321 | 0 | continue; |
4322 | 0 | } |
4323 | 0 | if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[j])) { |
4324 | 0 | auto str_column = assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>( |
4325 | 0 | &(const_column->get_data_column())); |
4326 | 0 | auto data_item = str_column->get_data_at(0); |
4327 | 0 | memcpy_small_allow_read_write_overflow15( |
4328 | 0 | &res_data[res_offset[i - 1]] + current_length, data_item.data, |
4329 | 0 | data_item.size); |
4330 | 0 | current_length += data_item.size; |
4331 | 0 | } else { |
4332 | 0 | auto& current_offsets = *offsets_list[j]; |
4333 | 0 | auto& current_chars = *chars_list[j]; |
4334 | |
|
4335 | 0 | int size = current_offsets[i] - current_offsets[i - 1]; |
4336 | 0 | if (size > 0) { |
4337 | 0 | memcpy_small_allow_read_write_overflow15( |
4338 | 0 | &res_data[res_offset[i - 1]] + current_length, |
4339 | 0 | ¤t_chars[current_offsets[i - 1]], size); |
4340 | 0 | current_length += size; |
4341 | 0 | } |
4342 | 0 | } |
4343 | 0 | } |
4344 | 0 | res_offset[i] = res_offset[i - 1] + current_length; |
4345 | 0 | } |
4346 | | |
4347 | | // validate utf8 |
4348 | 0 | auto* null_map_data = null_map->get_data().data(); |
4349 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
4350 | 0 | if (!validate_utf8((const char*)(&res_data[res_offset[i - 1]]), |
4351 | 0 | res_offset[i] - res_offset[i - 1])) { |
4352 | 0 | null_map_data[i] = 1; |
4353 | 0 | } |
4354 | 0 | } |
4355 | |
|
4356 | 0 | block.get_by_position(result).column = |
4357 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
4358 | 0 | return Status::OK(); |
4359 | 0 | } |
4360 | | |
4361 | | private: |
4362 | | void integer_to_char_(int line_num, const int* num, ColumnString::Chars& chars, |
4363 | 0 | IColumn::Offsets& offsets) const { |
4364 | 0 | if (0 == *num) { |
4365 | 0 | chars.push_back('\0'); |
4366 | 0 | offsets[line_num] = offsets[line_num - 1] + 1; |
4367 | 0 | return; |
4368 | 0 | } |
4369 | 0 | const char* bytes = (const char*)(num); |
4370 | 0 | if constexpr (std::endian::native == std::endian::little) { |
4371 | 0 | int k = 3; |
4372 | 0 | for (; k >= 0; --k) { |
4373 | 0 | if (bytes[k]) { |
4374 | 0 | break; |
4375 | 0 | } |
4376 | 0 | } |
4377 | 0 | offsets[line_num] = offsets[line_num - 1] + k + 1; |
4378 | 0 | for (; k >= 0; --k) { |
4379 | 0 | chars.push_back(bytes[k] ? bytes[k] : '\0'); |
4380 | 0 | } |
4381 | | } else if constexpr (std::endian::native == std::endian::big) { |
4382 | | int k = 0; |
4383 | | for (; k < 4; ++k) { |
4384 | | if (bytes[k]) { |
4385 | | break; |
4386 | | } |
4387 | | } |
4388 | | offsets[line_num] = offsets[line_num - 1] + 4 - k; |
4389 | | for (; k < 4; ++k) { |
4390 | | chars.push_back(bytes[k] ? bytes[k] : '\0'); |
4391 | | } |
4392 | | } else { |
4393 | | static_assert(std::endian::native == std::endian::big || |
4394 | | std::endian::native == std::endian::little, |
4395 | | "Unsupported endianness"); |
4396 | | } |
4397 | 0 | } |
4398 | | }; |
4399 | | |
4400 | | class FunctionOverlay : public IFunction { |
4401 | | public: |
4402 | | static constexpr auto name = "overlay"; |
4403 | 25 | static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); } |
4404 | 1 | String get_name() const override { return name; } |
4405 | 17 | size_t get_number_of_arguments() const override { return 4; } |
4406 | | |
4407 | 17 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
4408 | 17 | return std::make_shared<DataTypeString>(); |
4409 | 17 | } |
4410 | | |
4411 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
4412 | 13 | uint32_t result, size_t input_rows_count) const override { |
4413 | 13 | DCHECK_EQ(arguments.size(), 4); |
4414 | | |
4415 | 13 | bool col_const[4]; |
4416 | 13 | ColumnPtr argument_columns[4]; |
4417 | 65 | for (int i = 0; i < 4; ++i) { |
4418 | 52 | std::tie(argument_columns[i], col_const[i]) = |
4419 | 52 | unpack_if_const(block.get_by_position(arguments[i]).column); |
4420 | 52 | } |
4421 | | |
4422 | 13 | const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get()); |
4423 | | |
4424 | 13 | const auto* col_pos = |
4425 | 13 | assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data(); |
4426 | 13 | const auto* col_len = |
4427 | 13 | assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data(); |
4428 | 13 | const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get()); |
4429 | | |
4430 | 13 | ColumnString::MutablePtr col_res = ColumnString::create(); |
4431 | | |
4432 | | // if all input string is ascii, we can use ascii function to handle it |
4433 | 13 | const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii(); |
4434 | 13 | std::visit( |
4435 | 13 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { |
4436 | 13 | if (is_all_ascii) { |
4437 | 6 | vector_ascii<origin_const, pos_const, len_const, insert_const>( |
4438 | 6 | col_origin, col_pos, col_len, col_insert, col_res, |
4439 | 6 | input_rows_count); |
4440 | 7 | } else { |
4441 | 7 | vector_utf8<origin_const, pos_const, len_const, insert_const>( |
4442 | 7 | col_origin, col_pos, col_len, col_insert, col_res, |
4443 | 7 | input_rows_count); |
4444 | 7 | } |
4445 | 13 | }, _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_ Line | Count | Source | 4435 | 1 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 4436 | 1 | if (is_all_ascii) { | 4437 | 0 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 4438 | 0 | col_origin, col_pos, col_len, col_insert, col_res, | 4439 | 0 | input_rows_count); | 4440 | 1 | } else { | 4441 | 1 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 4442 | 1 | col_origin, col_pos, col_len, col_insert, col_res, | 4443 | 1 | input_rows_count); | 4444 | 1 | } | 4445 | 1 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_ _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_ Line | Count | Source | 4435 | 12 | [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) { | 4436 | 12 | if (is_all_ascii) { | 4437 | 6 | vector_ascii<origin_const, pos_const, len_const, insert_const>( | 4438 | 6 | col_origin, col_pos, col_len, col_insert, col_res, | 4439 | 6 | input_rows_count); | 4440 | 6 | } else { | 4441 | 6 | vector_utf8<origin_const, pos_const, len_const, insert_const>( | 4442 | 6 | col_origin, col_pos, col_len, col_insert, col_res, | 4443 | 6 | input_rows_count); | 4444 | 6 | } | 4445 | 12 | }, |
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_ Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_ |
4446 | 13 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
4447 | 13 | make_bool_variant(col_const[2]), make_bool_variant(col_const[3])); |
4448 | 13 | block.replace_by_position(result, std::move(col_res)); |
4449 | 13 | return Status::OK(); |
4450 | 13 | } |
4451 | | |
4452 | | private: |
4453 | | template <bool origin_const, bool pos_const, bool len_const, bool insert_const> |
4454 | | static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len, |
4455 | | const ColumnString* col_insert, ColumnString::MutablePtr& col_res, |
4456 | 6 | size_t input_rows_count) { |
4457 | 6 | auto& col_res_chars = col_res->get_chars(); |
4458 | 6 | auto& col_res_offsets = col_res->get_offsets(); |
4459 | 6 | StringRef origin_str, insert_str; |
4460 | 12 | for (size_t i = 0; i < input_rows_count; i++) { |
4461 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); |
4462 | | // pos is 1-based index,so we need to minus 1 |
4463 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; |
4464 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; |
4465 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); |
4466 | 6 | const auto origin_size = origin_str.size; |
4467 | 6 | if (pos >= origin_size || pos < 0) { |
4468 | | // If pos is not within the length of the string, the original string is returned. |
4469 | 3 | col_res->insert_data(origin_str.data, origin_str.size); |
4470 | 3 | continue; |
4471 | 3 | } |
4472 | 3 | col_res_chars.insert(origin_str.data, |
4473 | 3 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 |
4474 | 3 | if (pos + len > origin_size || len < 0) { |
4475 | 1 | col_res_chars.insert(insert_str.begin(), |
4476 | 1 | insert_str.end()); // copy all of insert_str. |
4477 | 2 | } else { |
4478 | 2 | col_res_chars.insert(insert_str.begin(), |
4479 | 2 | insert_str.end()); // copy all of insert_str. |
4480 | 2 | col_res_chars.insert( |
4481 | 2 | origin_str.data + pos + len, |
4482 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. |
4483 | 2 | } |
4484 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); |
4485 | 3 | col_res_offsets.push_back(col_res_chars.size()); |
4486 | 3 | } |
4487 | 6 | } Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 4456 | 6 | size_t input_rows_count) { | 4457 | 6 | auto& col_res_chars = col_res->get_chars(); | 4458 | 6 | auto& col_res_offsets = col_res->get_offsets(); | 4459 | 6 | StringRef origin_str, insert_str; | 4460 | 12 | for (size_t i = 0; i < input_rows_count; i++) { | 4461 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 4462 | | // pos is 1-based index,so we need to minus 1 | 4463 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 4464 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; | 4465 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 4466 | 6 | const auto origin_size = origin_str.size; | 4467 | 6 | if (pos >= origin_size || pos < 0) { | 4468 | | // If pos is not within the length of the string, the original string is returned. | 4469 | 3 | col_res->insert_data(origin_str.data, origin_str.size); | 4470 | 3 | continue; | 4471 | 3 | } | 4472 | 3 | col_res_chars.insert(origin_str.data, | 4473 | 3 | origin_str.data + pos); // copy origin_str with index 0 to pos - 1 | 4474 | 3 | if (pos + len > origin_size || len < 0) { | 4475 | 1 | col_res_chars.insert(insert_str.begin(), | 4476 | 1 | insert_str.end()); // copy all of insert_str. | 4477 | 2 | } else { | 4478 | 2 | col_res_chars.insert(insert_str.begin(), | 4479 | 2 | insert_str.end()); // copy all of insert_str. | 4480 | 2 | col_res_chars.insert( | 4481 | 2 | origin_str.data + pos + len, | 4482 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 4483 | 2 | } | 4484 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 4485 | 3 | col_res_offsets.push_back(col_res_chars.size()); | 4486 | 3 | } | 4487 | 6 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm |
4488 | | |
4489 | | template <bool origin_const, bool pos_const, bool len_const, bool insert_const> |
4490 | | NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin, |
4491 | | int const* col_pos, int const* col_len, |
4492 | | const ColumnString* col_insert, |
4493 | | ColumnString::MutablePtr& col_res, |
4494 | 7 | size_t input_rows_count) { |
4495 | 7 | auto& col_res_chars = col_res->get_chars(); |
4496 | 7 | auto& col_res_offsets = col_res->get_offsets(); |
4497 | 7 | StringRef origin_str, insert_str; |
4498 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. |
4499 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. |
4500 | 7 | std::vector<size_t> utf8_origin_offsets; |
4501 | 29 | for (size_t i = 0; i < input_rows_count; i++) { |
4502 | 22 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); |
4503 | | // pos is 1-based index,so we need to minus 1 |
4504 | 22 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; |
4505 | 22 | const auto len = col_len[index_check_const<len_const>(i)]; |
4506 | 22 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); |
4507 | 22 | utf8_origin_offsets.clear(); |
4508 | | |
4509 | 160 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { |
4510 | 138 | utf8_origin_offsets.push_back(ni); |
4511 | 138 | char_size = get_utf8_byte_length(origin_str.data[ni]); |
4512 | 138 | } |
4513 | | |
4514 | 22 | const size_t utf8_origin_size = utf8_origin_offsets.size(); |
4515 | | |
4516 | 22 | if (pos >= utf8_origin_size || pos < 0) { |
4517 | | // If pos is not within the length of the string, the original string is returned. |
4518 | 13 | col_res->insert_data(origin_str.data, origin_str.size); |
4519 | 13 | continue; |
4520 | 13 | } |
4521 | 9 | col_res_chars.insert( |
4522 | 9 | origin_str.data, |
4523 | 9 | origin_str.data + |
4524 | 9 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 |
4525 | 9 | if (pos + len >= utf8_origin_size || len < 0) { |
4526 | 4 | col_res_chars.insert(insert_str.begin(), |
4527 | 4 | insert_str.end()); // copy all of insert_str. |
4528 | 5 | } else { |
4529 | 5 | col_res_chars.insert(insert_str.begin(), |
4530 | 5 | insert_str.end()); // copy all of insert_str. |
4531 | 5 | col_res_chars.insert( |
4532 | 5 | origin_str.data + utf8_origin_offsets[pos + len], |
4533 | 5 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. |
4534 | 5 | } |
4535 | 9 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); |
4536 | 9 | col_res_offsets.push_back(col_res_chars.size()); |
4537 | 9 | } |
4538 | 7 | } _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 4494 | 1 | size_t input_rows_count) { | 4495 | 1 | auto& col_res_chars = col_res->get_chars(); | 4496 | 1 | auto& col_res_offsets = col_res->get_offsets(); | 4497 | 1 | StringRef origin_str, insert_str; | 4498 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 4499 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 4500 | 1 | std::vector<size_t> utf8_origin_offsets; | 4501 | 17 | for (size_t i = 0; i < input_rows_count; i++) { | 4502 | 16 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 4503 | | // pos is 1-based index,so we need to minus 1 | 4504 | 16 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 4505 | 16 | const auto len = col_len[index_check_const<len_const>(i)]; | 4506 | 16 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 4507 | 16 | utf8_origin_offsets.clear(); | 4508 | | | 4509 | 116 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 4510 | 100 | utf8_origin_offsets.push_back(ni); | 4511 | 100 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 4512 | 100 | } | 4513 | | | 4514 | 16 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 4515 | | | 4516 | 16 | if (pos >= utf8_origin_size || pos < 0) { | 4517 | | // If pos is not within the length of the string, the original string is returned. | 4518 | 10 | col_res->insert_data(origin_str.data, origin_str.size); | 4519 | 10 | continue; | 4520 | 10 | } | 4521 | 6 | col_res_chars.insert( | 4522 | 6 | origin_str.data, | 4523 | 6 | origin_str.data + | 4524 | 6 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 4525 | 6 | if (pos + len >= utf8_origin_size || len < 0) { | 4526 | 3 | col_res_chars.insert(insert_str.begin(), | 4527 | 3 | insert_str.end()); // copy all of insert_str. | 4528 | 3 | } else { | 4529 | 3 | col_res_chars.insert(insert_str.begin(), | 4530 | 3 | insert_str.end()); // copy all of insert_str. | 4531 | 3 | col_res_chars.insert( | 4532 | 3 | origin_str.data + utf8_origin_offsets[pos + len], | 4533 | 3 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 4534 | 3 | } | 4535 | 6 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 4536 | 6 | col_res_offsets.push_back(col_res_chars.size()); | 4537 | 6 | } | 4538 | 1 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Line | Count | Source | 4494 | 6 | size_t input_rows_count) { | 4495 | 6 | auto& col_res_chars = col_res->get_chars(); | 4496 | 6 | auto& col_res_offsets = col_res->get_offsets(); | 4497 | 6 | StringRef origin_str, insert_str; | 4498 | | // utf8_origin_offsets is used to store the offset of each utf8 character in the original string. | 4499 | | // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}. | 4500 | 6 | std::vector<size_t> utf8_origin_offsets; | 4501 | 12 | for (size_t i = 0; i < input_rows_count; i++) { | 4502 | 6 | origin_str = col_origin->get_data_at(index_check_const<origin_const>(i)); | 4503 | | // pos is 1-based index,so we need to minus 1 | 4504 | 6 | const auto pos = col_pos[index_check_const<pos_const>(i)] - 1; | 4505 | 6 | const auto len = col_len[index_check_const<len_const>(i)]; | 4506 | 6 | insert_str = col_insert->get_data_at(index_check_const<insert_const>(i)); | 4507 | 6 | utf8_origin_offsets.clear(); | 4508 | | | 4509 | 44 | for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) { | 4510 | 38 | utf8_origin_offsets.push_back(ni); | 4511 | 38 | char_size = get_utf8_byte_length(origin_str.data[ni]); | 4512 | 38 | } | 4513 | | | 4514 | 6 | const size_t utf8_origin_size = utf8_origin_offsets.size(); | 4515 | | | 4516 | 6 | if (pos >= utf8_origin_size || pos < 0) { | 4517 | | // If pos is not within the length of the string, the original string is returned. | 4518 | 3 | col_res->insert_data(origin_str.data, origin_str.size); | 4519 | 3 | continue; | 4520 | 3 | } | 4521 | 3 | col_res_chars.insert( | 4522 | 3 | origin_str.data, | 4523 | 3 | origin_str.data + | 4524 | 3 | utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1 | 4525 | 3 | if (pos + len >= utf8_origin_size || len < 0) { | 4526 | 1 | col_res_chars.insert(insert_str.begin(), | 4527 | 1 | insert_str.end()); // copy all of insert_str. | 4528 | 2 | } else { | 4529 | 2 | col_res_chars.insert(insert_str.begin(), | 4530 | 2 | insert_str.end()); // copy all of insert_str. | 4531 | 2 | col_res_chars.insert( | 4532 | 2 | origin_str.data + utf8_origin_offsets[pos + len], | 4533 | 2 | origin_str.end()); // copy origin_str from pos+len-1 to the end of the line. | 4534 | 2 | } | 4535 | 3 | ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size()); | 4536 | 3 | col_res_offsets.push_back(col_res_chars.size()); | 4537 | 3 | } | 4538 | 6 | } |
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm |
4539 | | }; |
4540 | | |
4541 | | class FunctionNgramSearch : public IFunction { |
4542 | | public: |
4543 | | static constexpr auto name = "ngram_search"; |
4544 | 8 | static FunctionPtr create() { return std::make_shared<FunctionNgramSearch>(); } |
4545 | 1 | String get_name() const override { return name; } |
4546 | 0 | size_t get_number_of_arguments() const override { return 3; } |
4547 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
4548 | 0 | return std::make_shared<DataTypeFloat64>(); |
4549 | 0 | } |
4550 | | |
4551 | | // ngram_search(text,pattern,gram_num) |
4552 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
4553 | 0 | uint32_t result, size_t input_rows_count) const override { |
4554 | 0 | CHECK_EQ(arguments.size(), 3); |
4555 | 0 | auto col_res = ColumnFloat64::create(); |
4556 | 0 | bool col_const[3]; |
4557 | 0 | ColumnPtr argument_columns[3]; |
4558 | 0 | for (int i = 0; i < 3; ++i) { |
4559 | 0 | std::tie(argument_columns[i], col_const[i]) = |
4560 | 0 | unpack_if_const(block.get_by_position(arguments[i]).column); |
4561 | 0 | } |
4562 | | // There is no need to check if the 2-th,3-th parameters are const here because fe has already checked them. |
4563 | 0 | auto pattern = assert_cast<const ColumnString*>(argument_columns[1].get())->get_data_at(0); |
4564 | 0 | auto gram_num = assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_element(0); |
4565 | 0 | const auto* text_col = assert_cast<const ColumnString*>(argument_columns[0].get()); |
4566 | |
|
4567 | 0 | if (col_const[0]) { |
4568 | 0 | _execute_impl<true>(text_col, pattern, gram_num, *col_res, input_rows_count); |
4569 | 0 | } else { |
4570 | 0 | _execute_impl<false>(text_col, pattern, gram_num, *col_res, input_rows_count); |
4571 | 0 | } |
4572 | |
|
4573 | 0 | block.replace_by_position(result, std::move(col_res)); |
4574 | 0 | return Status::OK(); |
4575 | 0 | } |
4576 | | |
4577 | | private: |
4578 | | using NgramMap = phmap::flat_hash_map<uint32_t, uint8_t>; |
4579 | | // In the map, the key is the CRC32 hash result of a substring in the string, |
4580 | | // and the value indicates whether this hash is found in the text or pattern. |
4581 | | constexpr static auto not_found = 0b00; |
4582 | | constexpr static auto found_in_pattern = 0b01; |
4583 | | constexpr static auto found_in_text = 0b10; |
4584 | | constexpr static auto found_in_pattern_and_text = 0b11; |
4585 | | |
4586 | 0 | uint32_t sub_str_hash(const char* data, int32_t length) const { |
4587 | 0 | constexpr static uint32_t seed = 0; |
4588 | 0 | return crc32c::Extend(seed, (const uint8_t*)data, length); |
4589 | 0 | } |
4590 | | |
4591 | | template <bool column_const> |
4592 | | void _execute_impl(const ColumnString* text_col, StringRef& pattern, int gram_num, |
4593 | 0 | ColumnFloat64& res, size_t size) const { |
4594 | 0 | auto& res_data = res.get_data(); |
4595 | 0 | res_data.resize_fill(size, 0); |
4596 | | // If the length of the pattern is less than gram_num, return 0. |
4597 | 0 | if (pattern.size < gram_num) { |
4598 | 0 | return; |
4599 | 0 | } |
4600 | | |
4601 | | // Build a map by pattern string, which will be used repeatedly in the following loop. |
4602 | 0 | NgramMap pattern_map; |
4603 | 0 | int pattern_count = get_pattern_set(pattern_map, pattern, gram_num); |
4604 | | // Each time a loop is executed, the map will be modified, so it needs to be restored afterward. |
4605 | 0 | std::vector<uint32_t> restore_map; |
4606 | |
|
4607 | 0 | for (int i = 0; i < size; i++) { |
4608 | 0 | auto text = text_col->get_data_at(index_check_const<column_const>(i)); |
4609 | 0 | if (text.size < gram_num) { |
4610 | | // If the length of the text is less than gram_num, return 0. |
4611 | 0 | continue; |
4612 | 0 | } |
4613 | 0 | restore_map.reserve(text.size); |
4614 | 0 | auto [text_count, intersection_count] = |
4615 | 0 | get_text_set(text, gram_num, pattern_map, restore_map); |
4616 | | |
4617 | | // 2 * |Intersection| / (|text substr set| + |pattern substr set|) |
4618 | 0 | res_data[i] = 2.0 * intersection_count / (text_count + pattern_count); |
4619 | 0 | } |
4620 | 0 | } Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb1EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb0EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm |
4621 | | |
4622 | 0 | size_t get_pattern_set(NgramMap& pattern_map, StringRef& pattern, int gram_num) const { |
4623 | 0 | size_t pattern_count = 0; |
4624 | 0 | for (int i = 0; i + gram_num <= pattern.size; i++) { |
4625 | 0 | uint32_t cur_hash = sub_str_hash(pattern.data + i, gram_num); |
4626 | 0 | if (!pattern_map.contains(cur_hash)) { |
4627 | 0 | pattern_map[cur_hash] = found_in_pattern; |
4628 | 0 | pattern_count++; |
4629 | 0 | } |
4630 | 0 | } |
4631 | 0 | return pattern_count; |
4632 | 0 | } |
4633 | | |
4634 | | std::pair<size_t, size_t> get_text_set(StringRef& text, int gram_num, NgramMap& pattern_map, |
4635 | 0 | std::vector<uint32_t>& restore_map) const { |
4636 | 0 | restore_map.clear(); |
4637 | | //intersection_count indicates a substring both in pattern and text. |
4638 | 0 | size_t text_count = 0, intersection_count = 0; |
4639 | 0 | for (int i = 0; i + gram_num <= text.size; i++) { |
4640 | 0 | uint32_t cur_hash = sub_str_hash(text.data + i, gram_num); |
4641 | 0 | auto& val = pattern_map[cur_hash]; |
4642 | 0 | if (val == not_found) { |
4643 | 0 | val ^= found_in_text; |
4644 | 0 | DCHECK(val == found_in_text); |
4645 | | // only found in text |
4646 | 0 | text_count++; |
4647 | 0 | restore_map.push_back(cur_hash); |
4648 | 0 | } else if (val == found_in_pattern) { |
4649 | 0 | val ^= found_in_text; |
4650 | 0 | DCHECK(val == found_in_pattern_and_text); |
4651 | | // found in text and pattern |
4652 | 0 | text_count++; |
4653 | 0 | intersection_count++; |
4654 | 0 | restore_map.push_back(cur_hash); |
4655 | 0 | } |
4656 | 0 | } |
4657 | | // Restore the pattern_map. |
4658 | 0 | for (auto& restore_hash : restore_map) { |
4659 | 0 | pattern_map[restore_hash] ^= found_in_text; |
4660 | 0 | } |
4661 | |
|
4662 | 0 | return {text_count, intersection_count}; |
4663 | 0 | } |
4664 | | }; |
4665 | | |
4666 | | class FunctionTranslate : public IFunction { |
4667 | | public: |
4668 | | static constexpr auto name = "translate"; |
4669 | | using AsciiMap = std::array<UInt8, 128>; |
4670 | | constexpr static UInt8 DELETE_CHAR = 255; // 255 means delete this char |
4671 | 8 | static FunctionPtr create() { return std::make_shared<FunctionTranslate>(); } |
4672 | 1 | String get_name() const override { return name; } |
4673 | 0 | size_t get_number_of_arguments() const override { return 3; } |
4674 | | |
4675 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
4676 | 0 | return std::make_shared<DataTypeString>(); |
4677 | 0 | }; |
4678 | | |
4679 | 7 | DataTypes get_variadic_argument_types_impl() const override { |
4680 | 7 | return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(), |
4681 | 7 | std::make_shared<DataTypeString>()}; |
4682 | 7 | } |
4683 | | |
4684 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
4685 | 0 | uint32_t result, size_t input_rows_count) const override { |
4686 | 0 | CHECK_EQ(arguments.size(), 3); |
4687 | 0 | auto col_res = ColumnString::create(); |
4688 | 0 | bool col_const[3]; |
4689 | 0 | ColumnPtr argument_columns[3]; |
4690 | 0 | for (int i = 0; i < 3; ++i) { |
4691 | 0 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); |
4692 | 0 | } |
4693 | 0 | argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
4694 | 0 | *block.get_by_position(arguments[0]).column) |
4695 | 0 | .convert_to_full_column() |
4696 | 0 | : block.get_by_position(arguments[0]).column; |
4697 | 0 | default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); |
4698 | |
|
4699 | 0 | const auto* col_source = assert_cast<const ColumnString*>(argument_columns[0].get()); |
4700 | 0 | const auto* col_from = assert_cast<const ColumnString*>(argument_columns[1].get()); |
4701 | 0 | const auto* col_to = assert_cast<const ColumnString*>(argument_columns[2].get()); |
4702 | |
|
4703 | 0 | bool is_ascii = col_source->is_ascii() && col_from->is_ascii() && col_to->is_ascii(); |
4704 | 0 | auto impl_vectors = impl_vectors_utf8<false>; |
4705 | 0 | if (col_const[1] && col_const[2] && is_ascii) { |
4706 | 0 | impl_vectors = impl_vectors_ascii<true>; |
4707 | 0 | } else if (col_const[1] && col_const[2]) { |
4708 | 0 | impl_vectors = impl_vectors_utf8<true>; |
4709 | 0 | } else if (is_ascii) { |
4710 | 0 | impl_vectors = impl_vectors_ascii<false>; |
4711 | 0 | } |
4712 | 0 | impl_vectors(col_source, col_from, col_to, col_res.get()); |
4713 | 0 | block.get_by_position(result).column = std::move(col_res); |
4714 | 0 | return Status::OK(); |
4715 | 0 | } |
4716 | | |
4717 | | private: |
4718 | | template <bool IsConst> |
4719 | | static void impl_vectors_ascii(const ColumnString* col_source, const ColumnString* col_from, |
4720 | 0 | const ColumnString* col_to, ColumnString* col_res) { |
4721 | 0 | auto& res_chars = col_res->get_chars(); |
4722 | 0 | auto& res_offsets = col_res->get_offsets(); |
4723 | 0 | res_chars.reserve(col_source->get_chars().size()); |
4724 | 0 | res_offsets.reserve(col_source->get_offsets().size()); |
4725 | 0 | DCHECK_EQ(col_res->size(), 0); |
4726 | 0 | AsciiMap map; |
4727 | 0 | if (IsConst) { |
4728 | 0 | const auto& from_str = col_from->get_data_at(0); |
4729 | 0 | const auto& to_str = col_to->get_data_at(0); |
4730 | 0 | if (!build_translate_map_ascii(map, from_str, to_str)) { |
4731 | | // if the map is not need delete char, we can directly copy the source string,then use map to translate |
4732 | 0 | res_offsets.insert(col_source->get_offsets().begin(), |
4733 | 0 | col_source->get_offsets().end()); |
4734 | 0 | res_chars.insert(col_source->get_chars().begin(), col_source->get_chars().end()); |
4735 | 0 | for (int i = 0; i < res_chars.size(); ++i) { |
4736 | 0 | res_chars[i] = map[res_chars[i]]; // translate the chars |
4737 | 0 | } |
4738 | 0 | return; // no need to translate |
4739 | 0 | } |
4740 | 0 | } |
4741 | | |
4742 | 0 | auto res_size = 0; |
4743 | 0 | auto* begin_data = col_res->get_chars().data(); |
4744 | 0 | for (size_t i = 0; i < col_source->size(); ++i) { |
4745 | 0 | const auto& source_str = col_source->get_data_at(i); |
4746 | 0 | if (!IsConst) { |
4747 | 0 | const auto& from_str = col_from->get_data_at(i); |
4748 | 0 | const auto& to_str = col_to->get_data_at(i); |
4749 | 0 | build_translate_map_ascii(map, from_str, to_str); |
4750 | 0 | } |
4751 | 0 | auto* dst_data = begin_data + res_size; |
4752 | 0 | res_size += translate_ascii(source_str, map, dst_data); |
4753 | |
|
4754 | 0 | res_offsets.push_back(res_size); |
4755 | 0 | } |
4756 | 0 | DCHECK_GE(res_chars.capacity(), res_size); |
4757 | 0 | res_chars.resize(res_size); |
4758 | 0 | } Unexecuted instantiation: _ZN5doris17FunctionTranslate18impl_vectors_asciiILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_ Unexecuted instantiation: _ZN5doris17FunctionTranslate18impl_vectors_asciiILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_ |
4759 | | |
4760 | | // return true if no need delete char |
4761 | | bool static build_translate_map_ascii(AsciiMap& map, const StringRef& from_str, |
4762 | 0 | const StringRef& to_str) { |
4763 | 0 | for (size_t i = 0; i < map.size(); ++i) { |
4764 | 0 | map[i] = i; // initialize map to identity |
4765 | 0 | } |
4766 | 0 | std::array<UInt8, 128> set_map {0}; |
4767 | 0 | const auto min_size = std::min(from_str.size, to_str.size); |
4768 | | // all ascii characters are in the range [0, 127] |
4769 | 0 | for (size_t i = 0; i < min_size; ++i) { |
4770 | 0 | auto from_char = from_str.data[i]; |
4771 | 0 | auto to_char = to_str.data[i]; |
4772 | 0 | if (set_map[from_char] == 0) { |
4773 | 0 | set_map[from_char] = 1; |
4774 | 0 | map[from_char] = to_char; |
4775 | 0 | } |
4776 | 0 | } |
4777 | |
|
4778 | 0 | bool need_delete_char = false; |
4779 | |
|
4780 | 0 | for (size_t i = min_size; i < from_str.size; ++i) { |
4781 | 0 | auto from_char = from_str.data[i]; |
4782 | 0 | if (set_map[from_char] == 0) { |
4783 | 0 | set_map[from_char] = 1; |
4784 | 0 | map[from_char] = DELETE_CHAR; // delete this char |
4785 | 0 | need_delete_char = true; |
4786 | 0 | } |
4787 | 0 | } |
4788 | 0 | return need_delete_char; |
4789 | 0 | } |
4790 | | |
4791 | 0 | static size_t translate_ascii(const StringRef& source_str, AsciiMap& map, UInt8* dst_data) { |
4792 | 0 | auto* begin_data = dst_data; |
4793 | 0 | for (size_t i = 0; i < source_str.size; ++i) { |
4794 | 0 | auto c = source_str.data[i]; |
4795 | 0 | if (map[c] == DELETE_CHAR) { |
4796 | 0 | continue; // delete this char |
4797 | 0 | } |
4798 | 0 | *dst_data++ = map[c]; |
4799 | 0 | } |
4800 | 0 | return dst_data - begin_data; |
4801 | 0 | } |
4802 | | |
4803 | | template <bool IsConst> |
4804 | | static void impl_vectors_utf8(const ColumnString* col_source, const ColumnString* col_from, |
4805 | 0 | const ColumnString* col_to, ColumnString* col_res) { |
4806 | 0 | col_res->get_chars().reserve(col_source->get_chars().size()); |
4807 | 0 | col_res->get_offsets().reserve(col_source->get_offsets().size()); |
4808 | 0 | std::unordered_map<std::string_view, std::string_view> translate_map; |
4809 | 0 | if (IsConst) { |
4810 | 0 | const auto& from_str = col_from->get_data_at(0); |
4811 | 0 | const auto& to_str = col_to->get_data_at(0); |
4812 | 0 | translate_map = |
4813 | 0 | build_translate_map_utf8(from_str.to_string_view(), to_str.to_string_view()); |
4814 | 0 | } |
4815 | 0 | for (size_t i = 0; i < col_source->size(); ++i) { |
4816 | 0 | const auto& source_str = col_source->get_data_at(i); |
4817 | 0 | if (!IsConst) { |
4818 | 0 | const auto& from_str = col_from->get_data_at(i); |
4819 | 0 | const auto& to_str = col_to->get_data_at(i); |
4820 | 0 | translate_map = build_translate_map_utf8(from_str.to_string_view(), |
4821 | 0 | to_str.to_string_view()); |
4822 | 0 | } |
4823 | 0 | auto translated_str = translate_utf8(source_str.to_string_view(), translate_map); |
4824 | 0 | col_res->insert_data(translated_str.data(), translated_str.size()); |
4825 | 0 | } |
4826 | 0 | } Unexecuted instantiation: _ZN5doris17FunctionTranslate17impl_vectors_utf8ILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_ Unexecuted instantiation: _ZN5doris17FunctionTranslate17impl_vectors_utf8ILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_ |
4827 | | |
4828 | | static std::unordered_map<std::string_view, std::string_view> build_translate_map_utf8( |
4829 | 0 | const std::string_view& from_str, const std::string_view& to_str) { |
4830 | 0 | std::unordered_map<std::string_view, std::string_view> translate_map; |
4831 | 0 | for (size_t i = 0, from_char_size = 0, j = 0, to_char_size = 0; i < from_str.size(); |
4832 | 0 | i += from_char_size, j += to_char_size) { |
4833 | 0 | from_char_size = get_utf8_byte_length(from_str[i]); |
4834 | 0 | to_char_size = j < to_str.size() ? get_utf8_byte_length(to_str[j]) : 0; |
4835 | 0 | auto from_char = from_str.substr(i, from_char_size); |
4836 | 0 | if (translate_map.find(from_char) == translate_map.end()) { |
4837 | 0 | translate_map[from_char] = |
4838 | 0 | j < to_str.size() ? to_str.substr(j, to_char_size) : std::string_view(); |
4839 | 0 | } |
4840 | 0 | } |
4841 | 0 | return translate_map; |
4842 | 0 | } |
4843 | | |
4844 | | static std::string translate_utf8( |
4845 | | const std::string_view& source_str, |
4846 | 0 | std::unordered_map<std::string_view, std::string_view>& translate_map) { |
4847 | 0 | std::string result; |
4848 | 0 | result.reserve(source_str.size()); |
4849 | 0 | for (size_t i = 0, char_size = 0; i < source_str.size(); i += char_size) { |
4850 | 0 | char_size = get_utf8_byte_length(source_str[i]); |
4851 | 0 | auto c = source_str.substr(i, char_size); |
4852 | 0 | if (translate_map.find(c) != translate_map.end()) { |
4853 | 0 | if (!translate_map[c].empty()) { |
4854 | 0 | result.append(translate_map[c]); |
4855 | 0 | } |
4856 | 0 | } else { |
4857 | 0 | result.append(c); |
4858 | 0 | } |
4859 | 0 | } |
4860 | 0 | return result; |
4861 | 0 | } |
4862 | | }; |
4863 | | |
4864 | | /// xpath_string(xml, xpath) -> String |
4865 | | /// Returns the text content of the first node that matches the XPath expression. |
4866 | | /// Returns NULL if either xml or xpath is NULL. |
4867 | | /// Returns empty string if the XPath expression matches no nodes. |
4868 | | /// The text content includes the node and all its descendants. |
4869 | | /// Example: |
4870 | | /// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1' |
4871 | | /// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2' |
4872 | | /// xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = '' |
4873 | | /// xpath_string('invalid xml', '/a/b[1]') = NULL |
4874 | | /// xpath_string(NULL, '/a/b[1]') = NULL |
4875 | | /// xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL |
4876 | | class FunctionXPathString : public IFunction { |
4877 | | public: |
4878 | | static constexpr auto name = "xpath_string"; |
4879 | 84 | static FunctionPtr create() { return std::make_shared<FunctionXPathString>(); } |
4880 | 1 | String get_name() const override { return name; } |
4881 | 76 | size_t get_number_of_arguments() const override { return 2; } |
4882 | 76 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
4883 | 76 | return make_nullable(std::make_shared<DataTypeString>()); |
4884 | 76 | } |
4885 | | |
4886 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
4887 | 67 | uint32_t result, size_t input_rows_count) const override { |
4888 | 67 | CHECK_EQ(arguments.size(), 2); |
4889 | 67 | auto col_res = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create()); |
4890 | 67 | const auto& [left_col, left_const] = |
4891 | 67 | unpack_if_const(block.get_by_position(arguments[0]).column); |
4892 | 67 | const auto& [right_col, right_const] = |
4893 | 67 | unpack_if_const(block.get_by_position(arguments[1]).column); |
4894 | 67 | const auto& xml_col = *assert_cast<const ColumnString*>(left_col.get()); |
4895 | 67 | const auto& xpath_col = *assert_cast<const ColumnString*>(right_col.get()); |
4896 | | |
4897 | 67 | Status status; |
4898 | 67 | if (left_const && right_const) { |
4899 | 0 | status = execute_vector<true, true>(input_rows_count, xml_col, xpath_col, *col_res); |
4900 | 67 | } else if (left_const) { |
4901 | 22 | status = execute_vector<true, false>(input_rows_count, xml_col, xpath_col, *col_res); |
4902 | 45 | } else if (right_const) { |
4903 | 22 | status = execute_vector<false, true>(input_rows_count, xml_col, xpath_col, *col_res); |
4904 | 23 | } else { |
4905 | 23 | status = execute_vector<false, false>(input_rows_count, xml_col, xpath_col, *col_res); |
4906 | 23 | } |
4907 | 67 | if (!status.ok()) { |
4908 | 0 | return status; |
4909 | 0 | } |
4910 | | |
4911 | 67 | block.get_by_position(result).column = std::move(col_res); |
4912 | 67 | return Status::OK(); |
4913 | 67 | } |
4914 | | |
4915 | | private: |
4916 | 81 | static Status parse_xml(const StringRef& xml_str, pugi::xml_document& xml_doc) { |
4917 | 81 | pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data, xml_str.size); |
4918 | 81 | if (!result) { |
4919 | 0 | return Status::InvalidArgument("Function {} failed to parse XML string: {}", name, |
4920 | 0 | result.description()); |
4921 | 0 | } |
4922 | 81 | return Status::OK(); |
4923 | 81 | } |
4924 | | |
4925 | 84 | static Status build_xpath_query(const StringRef& xpath_str, pugi::xpath_query& xpath_query) { |
4926 | | // xpath_query will throws xpath_exception on compilation errors. |
4927 | 84 | try { |
4928 | | // NOTE!!!: don't use to_string_view(), because xpath_str maybe not null-terminated |
4929 | 84 | xpath_query = pugi::xpath_query(xpath_str.to_string().c_str()); |
4930 | 84 | } catch (const pugi::xpath_exception& e) { |
4931 | 0 | return Status::InvalidArgument("Function {} failed to build XPath query: {}", name, |
4932 | 0 | e.what()); |
4933 | 0 | } |
4934 | 84 | return Status::OK(); |
4935 | 84 | } |
4936 | | |
4937 | | template <bool left_const, bool right_const> |
4938 | | static Status execute_vector(const size_t input_rows_count, const ColumnString& xml_col, |
4939 | 67 | const ColumnString& xpath_col, ColumnNullable& res_col) { |
4940 | 67 | pugi::xml_document xml_doc; |
4941 | 67 | pugi::xpath_query xpath_query; |
4942 | | // first check right_const, because we want to check empty input first |
4943 | 67 | if constexpr (right_const) { |
4944 | 22 | auto xpath_str = xpath_col.get_data_at(0); |
4945 | 22 | if (xpath_str.empty()) { |
4946 | | // should return null if xpath_str is empty |
4947 | 1 | res_col.insert_many_defaults(input_rows_count); |
4948 | 1 | return Status::OK(); |
4949 | 1 | } |
4950 | 21 | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); |
4951 | 21 | } |
4952 | 22 | if constexpr (left_const) { |
4953 | 22 | auto xml_str = xml_col.get_data_at(0); |
4954 | 22 | if (xml_str.empty()) { |
4955 | | // should return null if xml_str is empty |
4956 | 1 | res_col.insert_many_defaults(input_rows_count); |
4957 | 1 | return Status::OK(); |
4958 | 1 | } |
4959 | 21 | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); |
4960 | 21 | } |
4961 | | |
4962 | 156 | for (size_t i = 0; i < input_rows_count; ++i) { |
4963 | 89 | if constexpr (!right_const) { |
4964 | 68 | auto xpath_str = xpath_col.get_data_at(i); |
4965 | 68 | if (xpath_str.empty()) { |
4966 | | // should return null if xpath_str is empty |
4967 | 5 | res_col.insert_default(); |
4968 | 5 | continue; |
4969 | 5 | } |
4970 | 63 | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); |
4971 | 63 | } |
4972 | 68 | if constexpr (!left_const) { |
4973 | 68 | auto xml_str = xml_col.get_data_at(i); |
4974 | 68 | if (xml_str.empty()) { |
4975 | | // should return null if xml_str is empty |
4976 | 4 | res_col.insert_default(); |
4977 | 4 | continue; |
4978 | 4 | } |
4979 | 64 | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); |
4980 | 64 | } |
4981 | 64 | std::string text; |
4982 | 89 | try { |
4983 | 89 | text = xpath_query.evaluate_string(xml_doc); |
4984 | 89 | } catch (const pugi::xpath_exception& e) { |
4985 | 0 | return Status::InvalidArgument("Function {} failed to query XPath string: {}", name, |
4986 | 0 | e.what()); |
4987 | 0 | } |
4988 | 80 | res_col.insert_data(text.data(), text.size()); |
4989 | 80 | } |
4990 | 67 | return Status::OK(); |
4991 | 67 | } Unexecuted instantiation: _ZN5doris19FunctionXPathString14execute_vectorILb1ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE _ZN5doris19FunctionXPathString14execute_vectorILb1ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE Line | Count | Source | 4939 | 22 | const ColumnString& xpath_col, ColumnNullable& res_col) { | 4940 | 22 | pugi::xml_document xml_doc; | 4941 | 22 | pugi::xpath_query xpath_query; | 4942 | | // first check right_const, because we want to check empty input first | 4943 | | if constexpr (right_const) { | 4944 | | auto xpath_str = xpath_col.get_data_at(0); | 4945 | | if (xpath_str.empty()) { | 4946 | | // should return null if xpath_str is empty | 4947 | | res_col.insert_many_defaults(input_rows_count); | 4948 | | return Status::OK(); | 4949 | | } | 4950 | | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); | 4951 | | } | 4952 | 22 | if constexpr (left_const) { | 4953 | 22 | auto xml_str = xml_col.get_data_at(0); | 4954 | 22 | if (xml_str.empty()) { | 4955 | | // should return null if xml_str is empty | 4956 | 1 | res_col.insert_many_defaults(input_rows_count); | 4957 | 1 | return Status::OK(); | 4958 | 1 | } | 4959 | 21 | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); | 4960 | 21 | } | 4961 | | | 4962 | 43 | for (size_t i = 0; i < input_rows_count; ++i) { | 4963 | 21 | if constexpr (!right_const) { | 4964 | 21 | auto xpath_str = xpath_col.get_data_at(i); | 4965 | 21 | if (xpath_str.empty()) { | 4966 | | // should return null if xpath_str is empty | 4967 | 1 | res_col.insert_default(); | 4968 | 1 | continue; | 4969 | 1 | } | 4970 | 20 | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); | 4971 | 20 | } | 4972 | | if constexpr (!left_const) { | 4973 | | auto xml_str = xml_col.get_data_at(i); | 4974 | | if (xml_str.empty()) { | 4975 | | // should return null if xml_str is empty | 4976 | | res_col.insert_default(); | 4977 | | continue; | 4978 | | } | 4979 | | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); | 4980 | | } | 4981 | 21 | std::string text; | 4982 | 21 | try { | 4983 | 21 | text = xpath_query.evaluate_string(xml_doc); | 4984 | 21 | } catch (const pugi::xpath_exception& e) { | 4985 | 0 | return Status::InvalidArgument("Function {} failed to query XPath string: {}", name, | 4986 | 0 | e.what()); | 4987 | 0 | } | 4988 | 20 | res_col.insert_data(text.data(), text.size()); | 4989 | 20 | } | 4990 | 22 | return Status::OK(); | 4991 | 22 | } |
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE Line | Count | Source | 4939 | 22 | const ColumnString& xpath_col, ColumnNullable& res_col) { | 4940 | 22 | pugi::xml_document xml_doc; | 4941 | 22 | pugi::xpath_query xpath_query; | 4942 | | // first check right_const, because we want to check empty input first | 4943 | 22 | if constexpr (right_const) { | 4944 | 22 | auto xpath_str = xpath_col.get_data_at(0); | 4945 | 22 | if (xpath_str.empty()) { | 4946 | | // should return null if xpath_str is empty | 4947 | 1 | res_col.insert_many_defaults(input_rows_count); | 4948 | 1 | return Status::OK(); | 4949 | 1 | } | 4950 | 21 | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); | 4951 | 21 | } | 4952 | | if constexpr (left_const) { | 4953 | | auto xml_str = xml_col.get_data_at(0); | 4954 | | if (xml_str.empty()) { | 4955 | | // should return null if xml_str is empty | 4956 | | res_col.insert_many_defaults(input_rows_count); | 4957 | | return Status::OK(); | 4958 | | } | 4959 | | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); | 4960 | | } | 4961 | | | 4962 | 43 | for (size_t i = 0; i < input_rows_count; ++i) { | 4963 | | if constexpr (!right_const) { | 4964 | | auto xpath_str = xpath_col.get_data_at(i); | 4965 | | if (xpath_str.empty()) { | 4966 | | // should return null if xpath_str is empty | 4967 | | res_col.insert_default(); | 4968 | | continue; | 4969 | | } | 4970 | | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); | 4971 | | } | 4972 | 21 | if constexpr (!left_const) { | 4973 | 21 | auto xml_str = xml_col.get_data_at(i); | 4974 | 21 | if (xml_str.empty()) { | 4975 | | // should return null if xml_str is empty | 4976 | 1 | res_col.insert_default(); | 4977 | 1 | continue; | 4978 | 1 | } | 4979 | 20 | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); | 4980 | 20 | } | 4981 | 20 | std::string text; | 4982 | 21 | try { | 4983 | 21 | text = xpath_query.evaluate_string(xml_doc); | 4984 | 21 | } catch (const pugi::xpath_exception& e) { | 4985 | 0 | return Status::InvalidArgument("Function {} failed to query XPath string: {}", name, | 4986 | 0 | e.what()); | 4987 | 0 | } | 4988 | 20 | res_col.insert_data(text.data(), text.size()); | 4989 | 20 | } | 4990 | 22 | return Status::OK(); | 4991 | 22 | } |
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE Line | Count | Source | 4939 | 23 | const ColumnString& xpath_col, ColumnNullable& res_col) { | 4940 | 23 | pugi::xml_document xml_doc; | 4941 | 23 | pugi::xpath_query xpath_query; | 4942 | | // first check right_const, because we want to check empty input first | 4943 | | if constexpr (right_const) { | 4944 | | auto xpath_str = xpath_col.get_data_at(0); | 4945 | | if (xpath_str.empty()) { | 4946 | | // should return null if xpath_str is empty | 4947 | | res_col.insert_many_defaults(input_rows_count); | 4948 | | return Status::OK(); | 4949 | | } | 4950 | | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); | 4951 | | } | 4952 | | if constexpr (left_const) { | 4953 | | auto xml_str = xml_col.get_data_at(0); | 4954 | | if (xml_str.empty()) { | 4955 | | // should return null if xml_str is empty | 4956 | | res_col.insert_many_defaults(input_rows_count); | 4957 | | return Status::OK(); | 4958 | | } | 4959 | | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); | 4960 | | } | 4961 | | | 4962 | 70 | for (size_t i = 0; i < input_rows_count; ++i) { | 4963 | 47 | if constexpr (!right_const) { | 4964 | 47 | auto xpath_str = xpath_col.get_data_at(i); | 4965 | 47 | if (xpath_str.empty()) { | 4966 | | // should return null if xpath_str is empty | 4967 | 4 | res_col.insert_default(); | 4968 | 4 | continue; | 4969 | 4 | } | 4970 | 43 | RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query)); | 4971 | 43 | } | 4972 | 47 | if constexpr (!left_const) { | 4973 | 47 | auto xml_str = xml_col.get_data_at(i); | 4974 | 47 | if (xml_str.empty()) { | 4975 | | // should return null if xml_str is empty | 4976 | 3 | res_col.insert_default(); | 4977 | 3 | continue; | 4978 | 3 | } | 4979 | 44 | RETURN_IF_ERROR(parse_xml(xml_str, xml_doc)); | 4980 | 44 | } | 4981 | 44 | std::string text; | 4982 | 47 | try { | 4983 | 47 | text = xpath_query.evaluate_string(xml_doc); | 4984 | 47 | } catch (const pugi::xpath_exception& e) { | 4985 | 0 | return Status::InvalidArgument("Function {} failed to query XPath string: {}", name, | 4986 | 0 | e.what()); | 4987 | 0 | } | 4988 | 40 | res_col.insert_data(text.data(), text.size()); | 4989 | 40 | } | 4990 | 23 | return Status::OK(); | 4991 | 23 | } |
|
4992 | | }; |
4993 | | |
4994 | | class MakeSetImpl { |
4995 | | public: |
4996 | | static constexpr auto name = "make_set"; |
4997 | | |
4998 | 0 | static size_t get_number_of_arguments() { return 0; } |
4999 | 1 | static bool is_variadic() { return true; } |
5000 | 0 | static DataTypePtr get_return_type_impl(const DataTypes& arguments) { |
5001 | 0 | if (arguments[0].get()->is_nullable()) { |
5002 | 0 | return make_nullable(std::make_shared<DataTypeString>()); |
5003 | 0 | } |
5004 | 0 | return std::make_shared<DataTypeString>(); |
5005 | 0 | } |
5006 | | |
5007 | | static bool is_return_nullable(bool has_nullable, |
5008 | 0 | const std::vector<ColumnWithConstAndNullMap>& cols_info) { |
5009 | 0 | return cols_info[0].null_map != nullptr; |
5010 | 0 | } |
5011 | | |
5012 | | static bool execute_const_null(ColumnString::MutablePtr& res_col, |
5013 | | PaddedPODArray<UInt8>& res_null_map_data, |
5014 | 0 | size_t input_rows_count, size_t null_index) { |
5015 | 0 | if (null_index == 1) { |
5016 | 0 | res_col->insert_many_defaults(input_rows_count); |
5017 | 0 | res_null_map_data.assign(input_rows_count, (UInt8)1); |
5018 | 0 | return true; |
5019 | 0 | } |
5020 | 0 | return false; |
5021 | 0 | } |
5022 | | |
5023 | | static void execute(const std::vector<ColumnWithConstAndNullMap>& column_infos, |
5024 | | ColumnString::MutablePtr& res_col, PaddedPODArray<UInt8>& res_null_map_data, |
5025 | 0 | size_t input_rows_count) { |
5026 | 0 | static constexpr char SEPARATOR = ','; |
5027 | 0 | const auto& bit_data = |
5028 | 0 | assert_cast<const ColumnInt64&>(*column_infos[0].nested_col).get_data(); |
5029 | 0 | std::vector<const ColumnString*> str_cols(column_infos.size()); |
5030 | 0 | for (size_t i = 1; i < column_infos.size(); ++i) { |
5031 | 0 | str_cols[i] = assert_cast<const ColumnString*>(column_infos[i].nested_col); |
5032 | 0 | } |
5033 | |
|
5034 | 0 | for (size_t row = 0; row < input_rows_count; ++row) { |
5035 | 0 | if (column_infos[0].is_null_at(row)) { |
5036 | 0 | res_col->insert_default(); |
5037 | 0 | res_null_map_data[row] = 1; |
5038 | 0 | continue; |
5039 | 0 | } |
5040 | | |
5041 | 0 | uint64_t bit = bit_data[column_infos[0].is_const ? 0 : row]; |
5042 | 0 | uint64_t col_pos = __builtin_ffsll(bit); |
5043 | 0 | ColumnString::Chars data; |
5044 | 0 | while (col_pos != 0 && col_pos < column_infos.size() && bit != 0) { |
5045 | 0 | if (!column_infos[col_pos].is_null_at(row)) { |
5046 | | /* Here insert `str,` directly to support the case below: |
5047 | | * SELECT MAKE_SET(3, '', 'a'); |
5048 | | * the exception result should be ',a'. |
5049 | | */ |
5050 | 0 | auto s_ref = str_cols[col_pos]->get_data_at( |
5051 | 0 | column_infos[col_pos].is_const ? 0 : row); |
5052 | 0 | data.insert(s_ref.data, s_ref.data + s_ref.size); |
5053 | 0 | data.push_back(SEPARATOR); |
5054 | 0 | } |
5055 | 0 | bit &= ~(1ULL << (col_pos - 1)); |
5056 | 0 | col_pos = __builtin_ffsll(bit); |
5057 | 0 | } |
5058 | | // remove the last ',' |
5059 | 0 | if (!data.empty()) { |
5060 | 0 | data.pop_back(); |
5061 | 0 | } |
5062 | 0 | res_col->insert_data(reinterpret_cast<const char*>(data.data()), data.size()); |
5063 | 0 | } |
5064 | 0 | } |
5065 | | }; |
5066 | | |
5067 | | class FunctionExportSet : public IFunction { |
5068 | | public: |
5069 | | static constexpr auto name = "export_set"; |
5070 | 8 | static FunctionPtr create() { return std::make_shared<FunctionExportSet>(); } |
5071 | 0 | String get_name() const override { return name; } |
5072 | 0 | size_t get_number_of_arguments() const override { return 0; } |
5073 | 1 | bool is_variadic() const override { return true; } |
5074 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
5075 | 0 | return std::make_shared<DataTypeString>(); |
5076 | 0 | } |
5077 | | |
5078 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
5079 | 0 | uint32_t result, size_t input_rows_count) const override { |
5080 | 0 | auto res_col = ColumnString::create(); |
5081 | |
|
5082 | 0 | const size_t arg_size = arguments.size(); |
5083 | 0 | bool col_const[5]; |
5084 | 0 | ColumnPtr arg_cols[5]; |
5085 | 0 | bool all_const = true; |
5086 | 0 | for (int i = 1; i < arg_size; ++i) { |
5087 | 0 | col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); |
5088 | 0 | all_const = all_const && col_const[i]; |
5089 | 0 | } |
5090 | 0 | std::tie(arg_cols[0], col_const[0]) = |
5091 | 0 | unpack_if_const(block.get_by_position(arguments[0]).column); |
5092 | 0 | if (arg_size == 3) { |
5093 | 0 | default_preprocess_parameter_columns(arg_cols, col_const, {1, 2}, block, arguments); |
5094 | 0 | } else if (arg_size == 4) { |
5095 | 0 | default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3}, block, arguments); |
5096 | 0 | } else if (arg_size == 5) { |
5097 | 0 | default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3, 4}, block, |
5098 | 0 | arguments); |
5099 | 0 | } |
5100 | |
|
5101 | 0 | const auto* bit_col = assert_cast<const ColumnInt128*>(arg_cols[0].get()); |
5102 | 0 | const auto* on_col = assert_cast<const ColumnString*>(arg_cols[1].get()); |
5103 | 0 | const auto* off_col = assert_cast<const ColumnString*>(arg_cols[2].get()); |
5104 | 0 | const ColumnString* sep_col = nullptr; |
5105 | 0 | const ColumnInt32* num_bits_col = nullptr; |
5106 | 0 | if (arg_size > 3) { |
5107 | 0 | sep_col = assert_cast<const ColumnString*>(arg_cols[3].get()); |
5108 | 0 | if (arg_size == 5) { |
5109 | 0 | num_bits_col = assert_cast<const ColumnInt32*>(arg_cols[4].get()); |
5110 | 0 | } |
5111 | 0 | } |
5112 | |
|
5113 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
5114 | 0 | uint64_t bit = |
5115 | 0 | check_and_get_bit(bit_col->get_element(index_check_const(i, col_const[0]))); |
5116 | |
|
5117 | 0 | size_t idx_for_args = all_const ? 0 : i; |
5118 | 0 | StringRef on = on_col->get_data_at(idx_for_args); |
5119 | 0 | StringRef off = off_col->get_data_at(idx_for_args); |
5120 | 0 | StringRef separator(",", 1); |
5121 | 0 | int8_t num_of_bits = 64; |
5122 | |
|
5123 | 0 | if (arg_size > 3) { |
5124 | 0 | separator = sep_col->get_data_at(idx_for_args); |
5125 | 0 | if (arg_size == 5) { |
5126 | 0 | num_of_bits = |
5127 | 0 | check_and_get_num_of_bits(num_bits_col->get_element(idx_for_args)); |
5128 | 0 | } |
5129 | 0 | } |
5130 | |
|
5131 | 0 | execute_single(bit, on, off, separator, num_of_bits, *res_col); |
5132 | 0 | } |
5133 | 0 | block.replace_by_position(result, std::move(res_col)); |
5134 | 0 | return Status::OK(); |
5135 | 0 | } |
5136 | | |
5137 | | private: |
5138 | | /* The valid range of the input `bit` parameter should be [-2^63, 2^64 - 1] |
5139 | | * If it exceeds this range, the MAX/MIN values of the signed 64-bit integer are used for calculation |
5140 | | * This behavior is consistent with MySQL. |
5141 | | */ |
5142 | 0 | uint64_t check_and_get_bit(__int128 col_bit_val) const { |
5143 | 0 | if (col_bit_val > ULLONG_MAX) { |
5144 | 0 | return LLONG_MAX; |
5145 | 0 | } else if (col_bit_val < LLONG_MIN) { |
5146 | 0 | return LLONG_MIN; |
5147 | 0 | } |
5148 | 0 | return static_cast<uint64_t>(col_bit_val); |
5149 | 0 | } |
5150 | | |
5151 | | // If the input value is not in the range [0, 64], return default value 64 |
5152 | 0 | int8_t check_and_get_num_of_bits(int32_t col_num_of_bits_val) const { |
5153 | 0 | if (col_num_of_bits_val >= 0 && col_num_of_bits_val <= 64) { |
5154 | 0 | return static_cast<int8_t>(col_num_of_bits_val); |
5155 | 0 | } |
5156 | 0 | return 64; |
5157 | 0 | } |
5158 | | |
5159 | | void execute_single(uint64_t bit, const StringRef& on, const StringRef& off, |
5160 | | const StringRef& separator, int8_t num_of_bits, |
5161 | 0 | ColumnString& res_col) const { |
5162 | 0 | ColumnString::Chars data; |
5163 | 0 | data.reserve(std::max(on.size, off.size) * num_of_bits + |
5164 | 0 | separator.size * (num_of_bits - 1)); |
5165 | |
|
5166 | 0 | while (bit && num_of_bits) { |
5167 | 0 | if (bit & 1) { |
5168 | 0 | data.insert(on.data, on.data + on.size); |
5169 | 0 | } else { |
5170 | 0 | data.insert(off.data, off.data + off.size); |
5171 | 0 | } |
5172 | 0 | bit >>= 1; |
5173 | 0 | if (--num_of_bits) { |
5174 | 0 | data.insert(separator.data, separator.data + separator.size); |
5175 | 0 | } |
5176 | 0 | } |
5177 | |
|
5178 | 0 | if (num_of_bits > 0) { |
5179 | 0 | ColumnString::Chars off_sep_combo; |
5180 | 0 | off_sep_combo.reserve(separator.size + off.size); |
5181 | 0 | off_sep_combo.insert(off_sep_combo.end(), off.data, off.data + off.size); |
5182 | 0 | off_sep_combo.insert(off_sep_combo.end(), separator.data, |
5183 | 0 | separator.data + separator.size); |
5184 | |
|
5185 | 0 | for (size_t i = 0; i < num_of_bits; ++i) { |
5186 | 0 | data.insert(off_sep_combo.data(), off_sep_combo.data() + off_sep_combo.size()); |
5187 | 0 | } |
5188 | 0 | data.erase(data.end() - separator.size, data.end()); |
5189 | 0 | } |
5190 | |
|
5191 | 0 | res_col.insert_data(reinterpret_cast<const char*>(data.data()), data.size()); |
5192 | 0 | } |
5193 | | }; |
5194 | | |
5195 | | // ATTN: for debug only |
5196 | | // compute crc32 hash value as the same way in `VOlapTablePartitionParam::find_tablets()` |
5197 | | class FunctionCrc32Internal : public IFunction { |
5198 | | public: |
5199 | | static constexpr auto name = "crc32_internal"; |
5200 | 8 | static FunctionPtr create() { return std::make_shared<FunctionCrc32Internal>(); } |
5201 | 0 | String get_name() const override { return name; } |
5202 | 0 | size_t get_number_of_arguments() const override { return 0; } |
5203 | 1 | bool is_variadic() const override { return true; } |
5204 | 0 | bool use_default_implementation_for_nulls() const override { return false; } |
5205 | 0 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
5206 | 0 | return std::make_shared<DataTypeInt64>(); |
5207 | 0 | } |
5208 | | |
5209 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
5210 | 0 | uint32_t result, size_t input_rows_count) const override { |
5211 | 0 | DCHECK_GE(arguments.size(), 1); |
5212 | |
|
5213 | 0 | auto argument_size = arguments.size(); |
5214 | 0 | std::vector<ColumnPtr> argument_columns(argument_size); |
5215 | 0 | std::vector<PrimitiveType> argument_primitive_types(argument_size); |
5216 | |
|
5217 | 0 | for (size_t i = 0; i < argument_size; ++i) { |
5218 | 0 | argument_columns[i] = |
5219 | 0 | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
5220 | 0 | argument_primitive_types[i] = |
5221 | 0 | block.get_by_position(arguments[i]).type->get_primitive_type(); |
5222 | 0 | } |
5223 | |
|
5224 | 0 | auto res_col = ColumnInt64::create(); |
5225 | 0 | auto& res_data = res_col->get_data(); |
5226 | 0 | res_data.resize_fill(input_rows_count, 0); |
5227 | |
|
5228 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
5229 | 0 | uint32_t hash_val = 0; |
5230 | 0 | for (size_t j = 0; j < argument_size; ++j) { |
5231 | 0 | const auto& column = argument_columns[j]; |
5232 | 0 | auto primitive_type = argument_primitive_types[j]; |
5233 | 0 | auto val = column->get_data_at(i); |
5234 | 0 | if (val.data != nullptr) { |
5235 | 0 | hash_val = RawValue::zlib_crc32(val.data, val.size, primitive_type, hash_val); |
5236 | 0 | } else { |
5237 | 0 | hash_val = HashUtil::zlib_crc_hash_null(hash_val); |
5238 | 0 | } |
5239 | 0 | } |
5240 | 0 | res_data[i] = hash_val; |
5241 | 0 | } |
5242 | |
|
5243 | 0 | block.replace_by_position(result, std::move(res_col)); |
5244 | 0 | return Status::OK(); |
5245 | 0 | } |
5246 | | }; |
5247 | | |
5248 | | class FunctionUnicodeNormalize : public IFunction { |
5249 | | public: |
5250 | | static constexpr auto name = "unicode_normalize"; |
5251 | | |
5252 | 15 | static FunctionPtr create() { return std::make_shared<FunctionUnicodeNormalize>(); } |
5253 | | |
5254 | 3 | String get_name() const override { return name; } |
5255 | | |
5256 | 7 | size_t get_number_of_arguments() const override { return 2; } |
5257 | | |
5258 | 7 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
5259 | 7 | if (arguments.size() != 2 || !is_string_type(arguments[0]->get_primitive_type()) || |
5260 | 7 | !is_string_type(arguments[1]->get_primitive_type())) { |
5261 | 0 | throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
5262 | 0 | "Illegal type {} and {} of arguments of function {}", |
5263 | 0 | arguments[0]->get_name(), arguments[1]->get_name(), get_name()); |
5264 | 0 | } |
5265 | 7 | return arguments[0]; |
5266 | 7 | } |
5267 | | |
5268 | 10 | ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
5269 | | |
5270 | 12 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
5271 | 12 | if (scope == FunctionContext::THREAD_LOCAL) { |
5272 | 5 | return Status::OK(); |
5273 | 5 | } |
5274 | | |
5275 | 7 | if (!context->is_col_constant(1)) { |
5276 | 1 | return Status::InvalidArgument( |
5277 | 1 | "The second argument 'mode' of function {} must be constant", get_name()); |
5278 | 1 | } |
5279 | | |
5280 | 6 | auto* const_col = context->get_constant_col(1); |
5281 | 6 | auto mode_ref = const_col->column_ptr->get_data_at(0); |
5282 | 6 | std::string lower_mode = doris::to_lower(std::string(doris::trim(mode_ref.to_string()))); |
5283 | | |
5284 | 6 | UErrorCode status = U_ZERO_ERROR; |
5285 | 6 | const icu::Normalizer2* normalizer = nullptr; |
5286 | | |
5287 | 6 | if (lower_mode == "nfc") { |
5288 | 2 | normalizer = icu::Normalizer2::getInstance(nullptr, "nfc", UNORM2_COMPOSE, status); |
5289 | 4 | } else if (lower_mode == "nfd") { |
5290 | 1 | normalizer = icu::Normalizer2::getNFDInstance(status); |
5291 | 3 | } else if (lower_mode == "nfkc") { |
5292 | 0 | normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc", UNORM2_COMPOSE, status); |
5293 | 3 | } else if (lower_mode == "nfkd") { |
5294 | 1 | normalizer = icu::Normalizer2::getNFKDInstance(status); |
5295 | 2 | } else if (lower_mode == "nfkc_cf") { |
5296 | 1 | normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, status); |
5297 | 1 | } else { |
5298 | 1 | return Status::InvalidArgument( |
5299 | 1 | "Invalid normalization mode '{}' for function {}. " |
5300 | 1 | "Supported modes: NFC, NFD, NFKC, NFKD, NFKC_CF", |
5301 | 1 | lower_mode, get_name()); |
5302 | 1 | } |
5303 | | |
5304 | 5 | if (U_FAILURE(status) || normalizer == nullptr) { |
5305 | 0 | return Status::InvalidArgument( |
5306 | 0 | "Failed to get normalizer instance for mode '{}' in function {}: {}", |
5307 | 0 | lower_mode, get_name(), u_errorName(status)); |
5308 | 0 | } |
5309 | | |
5310 | 5 | auto state = std::make_shared<UnicodeNormalizeState>(); |
5311 | 5 | state->normalizer = normalizer; |
5312 | 5 | context->set_function_state(scope, state); |
5313 | 5 | return Status::OK(); |
5314 | 5 | } |
5315 | | |
5316 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
5317 | 5 | uint32_t result, size_t input_rows_count) const override { |
5318 | 5 | auto* state = reinterpret_cast<UnicodeNormalizeState*>( |
5319 | 5 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
5320 | 5 | if (state == nullptr || state->normalizer == nullptr) { |
5321 | 0 | return Status::RuntimeError("unicode_normalize function state is not initialized"); |
5322 | 0 | } |
5323 | | |
5324 | 5 | ColumnPtr col = |
5325 | 5 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
5326 | 5 | const auto* col_str = check_and_get_column<ColumnString>(col.get()); |
5327 | 5 | if (col_str == nullptr) { |
5328 | 0 | return Status::RuntimeError("Illegal column {} of argument of function {}", |
5329 | 0 | block.get_by_position(arguments[0]).column->get_name(), |
5330 | 0 | get_name()); |
5331 | 0 | } |
5332 | | |
5333 | 5 | const auto& data = col_str->get_chars(); |
5334 | 5 | const auto& offsets = col_str->get_offsets(); |
5335 | | |
5336 | 5 | auto res = ColumnString::create(); |
5337 | 5 | auto& res_data = res->get_chars(); |
5338 | 5 | auto& res_offsets = res->get_offsets(); |
5339 | | |
5340 | 5 | size_t rows = offsets.size(); |
5341 | 5 | res_offsets.resize(rows); |
5342 | | |
5343 | 5 | std::string tmp; |
5344 | 10 | for (size_t i = 0; i < rows; ++i) { |
5345 | 5 | const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
5346 | 5 | size_t len = offsets[i] - offsets[i - 1]; |
5347 | | |
5348 | 5 | normalize_one(state->normalizer, begin, len, tmp); |
5349 | 5 | StringOP::push_value_string(tmp, i, res_data, res_offsets); |
5350 | 5 | } |
5351 | | |
5352 | 5 | block.replace_by_position(result, std::move(res)); |
5353 | 5 | return Status::OK(); |
5354 | 5 | } |
5355 | | |
5356 | | private: |
5357 | | struct UnicodeNormalizeState { |
5358 | | const icu::Normalizer2* normalizer = nullptr; |
5359 | | }; |
5360 | | |
5361 | | static void normalize_one(const icu::Normalizer2* normalizer, const char* input, size_t length, |
5362 | 5 | std::string& output) { |
5363 | 5 | if (length == 0) { |
5364 | 0 | output.clear(); |
5365 | 0 | return; |
5366 | 0 | } |
5367 | | |
5368 | 5 | icu::StringPiece sp(input, static_cast<int32_t>(length)); |
5369 | 5 | icu::UnicodeString src16 = icu::UnicodeString::fromUTF8(sp); |
5370 | | |
5371 | 5 | UErrorCode status = U_ZERO_ERROR; |
5372 | 5 | UNormalizationCheckResult quick = normalizer->quickCheck(src16, status); |
5373 | 5 | if (U_SUCCESS(status) && quick == UNORM_YES) { |
5374 | 2 | output.assign(input, length); |
5375 | 2 | return; |
5376 | 2 | } |
5377 | | |
5378 | 3 | icu::UnicodeString result16; |
5379 | 3 | status = U_ZERO_ERROR; |
5380 | 3 | normalizer->normalize(src16, result16, status); |
5381 | 3 | if (U_FAILURE(status)) { |
5382 | 0 | output.assign(input, length); |
5383 | 0 | return; |
5384 | 0 | } |
5385 | | |
5386 | 3 | output.clear(); |
5387 | 3 | result16.toUTF8String(output); |
5388 | 3 | } |
5389 | | }; |
5390 | | |
5391 | | #include "common/compile_check_avoid_end.h" |
5392 | | } // namespace doris |