be/src/exprs/function/function_string_concat.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <fmt/format.h> |
21 | | |
22 | | #include <cstddef> |
23 | | #include <cstring> |
24 | | #include <string> |
25 | | #include <string_view> |
26 | | #include <vector> |
27 | | |
28 | | #include "common/status.h" |
29 | | #include "core/assert_cast.h" |
30 | | #include "core/block/block.h" |
31 | | #include "core/block/column_numbers.h" |
32 | | #include "core/column/column.h" |
33 | | #include "core/column/column_array.h" |
34 | | #include "core/column/column_const.h" |
35 | | #include "core/column/column_nullable.h" |
36 | | #include "core/column/column_string.h" |
37 | | #include "core/column/column_vector.h" |
38 | | #include "core/data_type/data_type_array.h" |
39 | | #include "core/data_type/data_type_nullable.h" |
40 | | #include "core/data_type/data_type_number.h" |
41 | | #include "core/data_type/data_type_string.h" |
42 | | #include "core/memcpy_small.h" |
43 | | #include "core/string_ref.h" |
44 | | #include "exec/common/stringop_substring.h" |
45 | | #include "exec/common/template_helpers.hpp" |
46 | | #include "exec/common/util.hpp" |
47 | | #include "exprs/function/function.h" |
48 | | #include "exprs/function/function_helpers.h" |
49 | | #include "exprs/function_context.h" |
50 | | #include "util/simd/vstring_function.h" |
51 | | |
52 | | namespace doris { |
53 | | #include "common/compile_check_avoid_begin.h" |
54 | | |
55 | | class FunctionStringConcat : public IFunction { |
56 | | public: |
57 | | struct ConcatState { |
58 | | bool use_state = false; |
59 | | std::string tail; |
60 | | }; |
61 | | |
62 | | static constexpr auto name = "concat"; |
63 | 365 | static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); } |
64 | 0 | String get_name() const override { return name; } |
65 | 0 | size_t get_number_of_arguments() const override { return 0; } |
66 | 364 | bool is_variadic() const override { return true; } |
67 | | |
68 | 363 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
69 | 363 | return std::make_shared<DataTypeString>(); |
70 | 363 | } |
71 | | |
72 | 727 | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
73 | 727 | if (scope == FunctionContext::THREAD_LOCAL) { |
74 | 363 | return Status::OK(); |
75 | 363 | } |
76 | 364 | std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>(); |
77 | | |
78 | 364 | context->set_function_state(scope, state); |
79 | | |
80 | 364 | state->use_state = true; |
81 | | |
82 | | // Optimize function calls like this: |
83 | | // concat(col, "123", "abc", "456") -> tail = "123abc456" |
84 | 580 | for (size_t i = 1; i < context->get_num_args(); i++) { |
85 | 383 | const auto* column_string = context->get_constant_col(i); |
86 | 383 | if (column_string == nullptr) { |
87 | 139 | state->use_state = false; |
88 | 139 | return IFunction::open(context, scope); |
89 | 139 | } |
90 | 244 | auto string_vale = column_string->column_ptr->get_data_at(0); |
91 | 244 | if (string_vale.data == nullptr) { |
92 | | // For concat(col, null), it is handled by default_implementation_for_nulls |
93 | 28 | state->use_state = false; |
94 | 28 | return IFunction::open(context, scope); |
95 | 28 | } |
96 | | |
97 | 216 | state->tail.append(string_vale.begin(), string_vale.size); |
98 | 216 | } |
99 | | |
100 | | // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below. |
101 | 197 | state->tail.reserve(state->tail.size() + 16); |
102 | | |
103 | 197 | return IFunction::open(context, scope); |
104 | 364 | } |
105 | | |
106 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
107 | 292 | uint32_t result, size_t input_rows_count) const override { |
108 | 292 | DCHECK_GE(arguments.size(), 1); |
109 | | |
110 | 292 | if (arguments.size() == 1) { |
111 | 3 | block.get_by_position(result).column = block.get_by_position(arguments[0]).column; |
112 | 3 | return Status::OK(); |
113 | 3 | } |
114 | 289 | auto* concat_state = reinterpret_cast<ConcatState*>( |
115 | 289 | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
116 | 289 | if (!concat_state) { |
117 | 0 | return Status::RuntimeError("funciton context for function '{}' must have ConcatState;", |
118 | 0 | get_name()); |
119 | 0 | } |
120 | 289 | if (concat_state->use_state) { |
121 | 175 | const auto& [col, is_const] = |
122 | 175 | unpack_if_const(block.get_by_position(arguments[0]).column); |
123 | 175 | const auto* col_str = assert_cast<const ColumnString*>(col.get()); |
124 | 175 | if (is_const) { |
125 | 0 | return execute_const<true>(concat_state, block, col_str, result, input_rows_count); |
126 | 175 | } else { |
127 | 175 | return execute_const<false>(concat_state, block, col_str, result, input_rows_count); |
128 | 175 | } |
129 | | |
130 | 175 | } else { |
131 | 114 | return execute_vecotr(block, arguments, result, input_rows_count); |
132 | 114 | } |
133 | 289 | } |
134 | | |
135 | | Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result, |
136 | 114 | size_t input_rows_count) const { |
137 | 114 | int argument_size = arguments.size(); |
138 | 114 | std::vector<ColumnPtr> argument_columns(argument_size); |
139 | | |
140 | 114 | std::vector<const ColumnString::Offsets*> offsets_list(argument_size); |
141 | 114 | std::vector<const ColumnString::Chars*> chars_list(argument_size); |
142 | 114 | std::vector<bool> is_const_args(argument_size); |
143 | | |
144 | 374 | for (int i = 0; i < argument_size; ++i) { |
145 | 260 | const auto& [col, is_const] = |
146 | 260 | unpack_if_const(block.get_by_position(arguments[i]).column); |
147 | | |
148 | 260 | const auto* col_str = assert_cast<const ColumnString*>(col.get()); |
149 | 260 | offsets_list[i] = &col_str->get_offsets(); |
150 | 260 | chars_list[i] = &col_str->get_chars(); |
151 | 260 | is_const_args[i] = is_const; |
152 | 260 | } |
153 | | |
154 | 114 | auto res = ColumnString::create(); |
155 | 114 | auto& res_data = res->get_chars(); |
156 | 114 | auto& res_offset = res->get_offsets(); |
157 | | |
158 | 114 | res_offset.resize(input_rows_count); |
159 | 114 | size_t res_reserve_size = 0; |
160 | 374 | for (size_t i = 0; i < argument_size; ++i) { |
161 | 260 | if (is_const_args[i]) { |
162 | 123 | res_reserve_size += (*offsets_list[i])[0] * input_rows_count; |
163 | 137 | } else { |
164 | 137 | res_reserve_size += (*offsets_list[i])[input_rows_count - 1]; |
165 | 137 | } |
166 | 260 | } |
167 | | |
168 | 114 | ColumnString::check_chars_length(res_reserve_size, 0); |
169 | | |
170 | 114 | res_data.resize(res_reserve_size); |
171 | | |
172 | 114 | auto* data = res_data.data(); |
173 | 114 | size_t dst_offset = 0; |
174 | | |
175 | 333 | for (size_t i = 0; i < input_rows_count; ++i) { |
176 | 695 | for (size_t j = 0; j < argument_size; ++j) { |
177 | 476 | const auto& current_offsets = *offsets_list[j]; |
178 | 476 | const auto& current_chars = *chars_list[j]; |
179 | 476 | auto idx = index_check_const(i, is_const_args[j]); |
180 | 476 | const auto size = current_offsets[idx] - current_offsets[idx - 1]; |
181 | 476 | if (size > 0) { |
182 | 386 | memcpy_small_allow_read_write_overflow15( |
183 | 386 | data + dst_offset, current_chars.data() + current_offsets[idx - 1], |
184 | 386 | size); |
185 | 386 | dst_offset += size; |
186 | 386 | } |
187 | 476 | } |
188 | 219 | res_offset[i] = dst_offset; |
189 | 219 | } |
190 | | |
191 | 114 | block.get_by_position(result).column = std::move(res); |
192 | 114 | return Status::OK(); |
193 | 114 | } |
194 | | |
195 | | template <bool is_const> |
196 | | Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str, |
197 | 175 | uint32_t result, size_t input_rows_count) const { |
198 | | // using tail optimize |
199 | | |
200 | 175 | auto res = ColumnString::create(); |
201 | 175 | auto& res_data = res->get_chars(); |
202 | 175 | auto& res_offset = res->get_offsets(); |
203 | 175 | res_offset.resize(input_rows_count); |
204 | | |
205 | 175 | size_t res_reserve_size = 0; |
206 | 175 | if constexpr (is_const) { |
207 | 0 | res_reserve_size = col_str->get_offsets()[0] * input_rows_count; |
208 | 175 | } else { |
209 | 175 | res_reserve_size = col_str->get_offsets()[input_rows_count - 1]; |
210 | 175 | } |
211 | 175 | res_reserve_size += concat_state->tail.size() * input_rows_count; |
212 | | |
213 | 175 | ColumnString::check_chars_length(res_reserve_size, 0); |
214 | 175 | res_data.resize(res_reserve_size); |
215 | | |
216 | 175 | const auto& tail = concat_state->tail; |
217 | 175 | auto* data = res_data.data(); |
218 | 175 | size_t dst_offset = 0; |
219 | | |
220 | 352 | for (size_t i = 0; i < input_rows_count; ++i) { |
221 | 177 | const auto idx = index_check_const<is_const>(i); |
222 | 177 | StringRef str_val = col_str->get_data_at(idx); |
223 | | // copy column |
224 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size); |
225 | 177 | dst_offset += str_val.size; |
226 | | // copy tail |
227 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size()); |
228 | 177 | dst_offset += tail.size(); |
229 | 177 | res_offset[i] = dst_offset; |
230 | 177 | } |
231 | 175 | block.get_by_position(result).column = std::move(res); |
232 | 175 | return Status::OK(); |
233 | 175 | } Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm _ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm Line | Count | Source | 197 | 175 | uint32_t result, size_t input_rows_count) const { | 198 | | // using tail optimize | 199 | | | 200 | 175 | auto res = ColumnString::create(); | 201 | 175 | auto& res_data = res->get_chars(); | 202 | 175 | auto& res_offset = res->get_offsets(); | 203 | 175 | res_offset.resize(input_rows_count); | 204 | | | 205 | 175 | size_t res_reserve_size = 0; | 206 | | if constexpr (is_const) { | 207 | | res_reserve_size = col_str->get_offsets()[0] * input_rows_count; | 208 | 175 | } else { | 209 | 175 | res_reserve_size = col_str->get_offsets()[input_rows_count - 1]; | 210 | 175 | } | 211 | 175 | res_reserve_size += concat_state->tail.size() * input_rows_count; | 212 | | | 213 | 175 | ColumnString::check_chars_length(res_reserve_size, 0); | 214 | 175 | res_data.resize(res_reserve_size); | 215 | | | 216 | 175 | const auto& tail = concat_state->tail; | 217 | 175 | auto* data = res_data.data(); | 218 | 175 | size_t dst_offset = 0; | 219 | | | 220 | 352 | for (size_t i = 0; i < input_rows_count; ++i) { | 221 | 177 | const auto idx = index_check_const<is_const>(i); | 222 | 177 | StringRef str_val = col_str->get_data_at(idx); | 223 | | // copy column | 224 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size); | 225 | 177 | dst_offset += str_val.size; | 226 | | // copy tail | 227 | 177 | memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size()); | 228 | 177 | dst_offset += tail.size(); | 229 | 177 | res_offset[i] = dst_offset; | 230 | 177 | } | 231 | 175 | block.get_by_position(result).column = std::move(res); | 232 | 175 | return Status::OK(); | 233 | 175 | } |
|
234 | | }; |
235 | | |
236 | | class FunctionStringElt : public IFunction { |
237 | | public: |
238 | | static constexpr auto name = "elt"; |
239 | 360 | static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); } |
240 | 0 | String get_name() const override { return name; } |
241 | 0 | size_t get_number_of_arguments() const override { return 0; } |
242 | 359 | bool is_variadic() const override { return true; } |
243 | | |
244 | 358 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
245 | 358 | return make_nullable(std::make_shared<DataTypeString>()); |
246 | 358 | } |
247 | 716 | bool use_default_implementation_for_nulls() const override { return false; } |
248 | | |
249 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
250 | 358 | uint32_t result, size_t input_rows_count) const override { |
251 | 358 | int arguent_size = arguments.size(); |
252 | 358 | int num_children = arguent_size - 1; |
253 | 358 | auto res = ColumnString::create(); |
254 | | |
255 | 358 | if (auto const_column = check_and_get_column<ColumnConst>( |
256 | 358 | *block.get_by_position(arguments[0]).column)) { |
257 | 153 | auto data = const_column->get_data_at(0); |
258 | | // return NULL, pos is null or pos < 0 or pos > num_children |
259 | 153 | auto is_null = data.data == nullptr; |
260 | 153 | auto pos = is_null ? 0 : *(Int32*)data.data; |
261 | 153 | is_null = pos <= 0 || pos > num_children; |
262 | | |
263 | 153 | auto null_map = ColumnUInt8::create(input_rows_count, is_null); |
264 | 153 | if (is_null) { |
265 | 135 | res->insert_many_defaults(input_rows_count); |
266 | 135 | } else { |
267 | 18 | auto& target_column = block.get_by_position(arguments[pos]).column; |
268 | 18 | if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) { |
269 | 6 | auto target_data = target_const_column->get_data_at(0); |
270 | | // return NULL, no target data |
271 | 6 | if (target_data.data == nullptr) { |
272 | 0 | null_map = ColumnUInt8::create(input_rows_count, true); |
273 | 0 | res->insert_many_defaults(input_rows_count); |
274 | 6 | } else { |
275 | 6 | res->insert_data_repeatedly(target_data.data, target_data.size, |
276 | 6 | input_rows_count); |
277 | 6 | } |
278 | 12 | } else if (auto target_nullable_column = |
279 | 12 | check_and_get_column<ColumnNullable>(*target_column)) { |
280 | 12 | auto& target_null_map = target_nullable_column->get_null_map_data(); |
281 | 12 | VectorizedUtils::update_null_map( |
282 | 12 | assert_cast<ColumnUInt8&>(*null_map).get_data(), target_null_map); |
283 | | |
284 | 12 | auto& target_str_column = assert_cast<const ColumnString&>( |
285 | 12 | target_nullable_column->get_nested_column()); |
286 | 12 | res->get_chars().assign(target_str_column.get_chars().begin(), |
287 | 12 | target_str_column.get_chars().end()); |
288 | 12 | res->get_offsets().assign(target_str_column.get_offsets().begin(), |
289 | 12 | target_str_column.get_offsets().end()); |
290 | 12 | } else { |
291 | 0 | auto& target_str_column = assert_cast<const ColumnString&>(*target_column); |
292 | 0 | res->get_chars().assign(target_str_column.get_chars().begin(), |
293 | 0 | target_str_column.get_chars().end()); |
294 | 0 | res->get_offsets().assign(target_str_column.get_offsets().begin(), |
295 | 0 | target_str_column.get_offsets().end()); |
296 | 0 | } |
297 | 18 | } |
298 | 153 | block.get_by_position(result).column = |
299 | 153 | ColumnNullable::create(std::move(res), std::move(null_map)); |
300 | 205 | } else if (auto pos_null_column = check_and_get_column<ColumnNullable>( |
301 | 205 | *block.get_by_position(arguments[0]).column)) { |
302 | 205 | auto& pos_column = |
303 | 205 | assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column()); |
304 | 205 | auto& pos_null_map = pos_null_column->get_null_map_data(); |
305 | 205 | auto null_map = ColumnUInt8::create(input_rows_count, false); |
306 | 205 | auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data(); |
307 | | |
308 | 460 | for (size_t i = 0; i < input_rows_count; ++i) { |
309 | 255 | auto pos = pos_column.get_element(i); |
310 | 255 | res_null_map[i] = |
311 | 255 | pos_null_map[i] || pos <= 0 || pos > num_children || |
312 | 255 | block.get_by_position(arguments[pos]).column->get_data_at(i).data == |
313 | 30 | nullptr; |
314 | 255 | if (res_null_map[i]) { |
315 | 225 | res->insert_default(); |
316 | 225 | } else { |
317 | 30 | auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i); |
318 | 30 | res->insert_data(insert_data.data, insert_data.size); |
319 | 30 | } |
320 | 255 | } |
321 | 205 | block.get_by_position(result).column = |
322 | 205 | ColumnNullable::create(std::move(res), std::move(null_map)); |
323 | 205 | } else { |
324 | 0 | auto& pos_column = |
325 | 0 | assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column); |
326 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, false); |
327 | 0 | auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data(); |
328 | |
|
329 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
330 | 0 | auto pos = pos_column.get_element(i); |
331 | 0 | res_null_map[i] = |
332 | 0 | pos <= 0 || pos > num_children || |
333 | 0 | block.get_by_position(arguments[pos]).column->get_data_at(i).data == |
334 | 0 | nullptr; |
335 | 0 | if (res_null_map[i]) { |
336 | 0 | res->insert_default(); |
337 | 0 | } else { |
338 | 0 | auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i); |
339 | 0 | res->insert_data(insert_data.data, insert_data.size); |
340 | 0 | } |
341 | 0 | } |
342 | 0 | block.get_by_position(result).column = |
343 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
344 | 0 | } |
345 | 358 | return Status::OK(); |
346 | 358 | } |
347 | | }; |
348 | | |
349 | | // concat_ws (string,string....) or (string, Array) |
350 | | // TODO: avoid use fmtlib |
351 | | class FunctionStringConcatWs : public IFunction { |
352 | | public: |
353 | | using Chars = ColumnString::Chars; |
354 | | using Offsets = ColumnString::Offsets; |
355 | | |
356 | | static constexpr auto name = "concat_ws"; |
357 | 451 | static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); } |
358 | 0 | String get_name() const override { return name; } |
359 | 0 | size_t get_number_of_arguments() const override { return 0; } |
360 | 450 | bool is_variadic() const override { return true; } |
361 | | |
362 | 449 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
363 | 449 | const IDataType* first_type = arguments[0].get(); |
364 | 449 | if (first_type->is_nullable()) { |
365 | 449 | return make_nullable(std::make_shared<DataTypeString>()); |
366 | 449 | } else { |
367 | 0 | return std::make_shared<DataTypeString>(); |
368 | 0 | } |
369 | 449 | } |
370 | 898 | bool use_default_implementation_for_nulls() const override { return false; } |
371 | | |
372 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
373 | 449 | uint32_t result, size_t input_rows_count) const override { |
374 | 449 | DCHECK_GE(arguments.size(), 2); |
375 | 449 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
376 | | // we create a zero column to simply implement |
377 | 449 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
378 | 449 | auto res = ColumnString::create(); |
379 | 449 | bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable(); |
380 | 449 | size_t argument_size = arguments.size(); |
381 | 449 | std::vector<const Offsets*> offsets_list(argument_size); |
382 | 449 | std::vector<const Chars*> chars_list(argument_size); |
383 | 449 | std::vector<const ColumnUInt8::Container*> null_list(argument_size); |
384 | | |
385 | 449 | std::vector<ColumnPtr> argument_columns(argument_size); |
386 | 449 | std::vector<ColumnPtr> argument_null_columns(argument_size); |
387 | | |
388 | 1.53k | for (size_t i = 0; i < argument_size; ++i) { |
389 | 1.08k | argument_columns[i] = |
390 | 1.08k | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
391 | 1.08k | if (const auto* nullable = |
392 | 1.08k | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
393 | | // Danger: Here must dispose the null map data first! Because |
394 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
395 | | // of column nullable mem of null map |
396 | 1.08k | null_list[i] = &nullable->get_null_map_data(); |
397 | 1.08k | argument_null_columns[i] = nullable->get_null_map_column_ptr(); |
398 | 1.08k | argument_columns[i] = nullable->get_nested_column_ptr(); |
399 | 1.08k | } else { |
400 | 0 | null_list[i] = &const_null_map->get_data(); |
401 | 0 | } |
402 | | |
403 | 1.08k | if (is_column<ColumnArray>(argument_columns[i].get())) { |
404 | 36 | continue; |
405 | 36 | } |
406 | | |
407 | 1.05k | const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get()); |
408 | 1.05k | offsets_list[i] = &col_str->get_offsets(); |
409 | 1.05k | chars_list[i] = &col_str->get_chars(); |
410 | 1.05k | } |
411 | | |
412 | 449 | auto& res_data = res->get_chars(); |
413 | 449 | auto& res_offset = res->get_offsets(); |
414 | 449 | res_offset.resize(input_rows_count); |
415 | | |
416 | 449 | VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]); |
417 | 449 | fmt::memory_buffer buffer; |
418 | 449 | std::vector<std::string_view> views; |
419 | | |
420 | 449 | if (is_column<ColumnArray>(argument_columns[1].get())) { |
421 | | // Determine if the nested type of the array is String |
422 | 36 | const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]); |
423 | 36 | if (!array_column.get_data().is_column_string()) { |
424 | 0 | return Status::NotSupported( |
425 | 0 | fmt::format("unsupported nested array of type {} for function {}", |
426 | 0 | is_column_nullable(array_column.get_data()) |
427 | 0 | ? array_column.get_data().get_name() |
428 | 0 | : array_column.get_data().get_name(), |
429 | 0 | get_name())); |
430 | 0 | } |
431 | | // Concat string in array |
432 | 36 | _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list, |
433 | 36 | null_list, res_data, res_offset); |
434 | | |
435 | 413 | } else { |
436 | | // Concat string |
437 | 413 | _execute_string(input_rows_count, argument_size, buffer, views, offsets_list, |
438 | 413 | chars_list, null_list, res_data, res_offset); |
439 | 413 | } |
440 | 449 | if (is_null_type) { |
441 | 449 | block.get_by_position(result).column = |
442 | 449 | ColumnNullable::create(std::move(res), std::move(null_map)); |
443 | 449 | } else { |
444 | 0 | block.get_by_position(result).column = std::move(res); |
445 | 0 | } |
446 | 449 | return Status::OK(); |
447 | 449 | } |
448 | | |
449 | | private: |
450 | | void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column, |
451 | | fmt::memory_buffer& buffer, std::vector<std::string_view>& views, |
452 | | const std::vector<const Offsets*>& offsets_list, |
453 | | const std::vector<const Chars*>& chars_list, |
454 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
455 | 36 | Chars& res_data, Offsets& res_offset) const { |
456 | | // Get array nested column |
457 | 36 | const UInt8* array_nested_null_map = nullptr; |
458 | 36 | ColumnPtr array_nested_column = nullptr; |
459 | | |
460 | 36 | if (is_column_nullable(array_column.get_data())) { |
461 | 36 | const auto& array_nested_null_column = |
462 | 36 | reinterpret_cast<const ColumnNullable&>(array_column.get_data()); |
463 | | // String's null map in array |
464 | 36 | array_nested_null_map = |
465 | 36 | array_nested_null_column.get_null_map_column().get_data().data(); |
466 | 36 | array_nested_column = array_nested_null_column.get_nested_column_ptr(); |
467 | 36 | } else { |
468 | 0 | array_nested_column = array_column.get_data_ptr(); |
469 | 0 | } |
470 | | |
471 | 36 | const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column); |
472 | 36 | const Chars& string_src_chars = string_column.get_chars(); |
473 | 36 | const auto& src_string_offsets = string_column.get_offsets(); |
474 | 36 | const auto& src_array_offsets = array_column.get_offsets(); |
475 | 36 | size_t current_src_array_offset = 0; |
476 | | |
477 | | // Concat string in array |
478 | 76 | for (size_t i = 0; i < input_rows_count; ++i) { |
479 | 40 | auto& sep_offsets = *offsets_list[0]; |
480 | 40 | auto& sep_chars = *chars_list[0]; |
481 | 40 | auto& sep_nullmap = *null_list[0]; |
482 | | |
483 | 40 | if (sep_nullmap[i]) { |
484 | 8 | res_offset[i] = res_data.size(); |
485 | 8 | current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1]; |
486 | 8 | continue; |
487 | 8 | } |
488 | | |
489 | 32 | int sep_size = sep_offsets[i] - sep_offsets[i - 1]; |
490 | 32 | const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]); |
491 | | |
492 | 32 | std::string_view sep(sep_data, sep_size); |
493 | 32 | buffer.clear(); |
494 | 32 | views.clear(); |
495 | | |
496 | 32 | for (auto next_src_array_offset = src_array_offsets[i]; |
497 | 128 | current_src_array_offset < next_src_array_offset; ++current_src_array_offset) { |
498 | 96 | const auto current_src_string_offset = |
499 | 96 | current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] |
500 | 96 | : 0; |
501 | 96 | size_t bytes_to_copy = |
502 | 96 | src_string_offsets[current_src_array_offset] - current_src_string_offset; |
503 | 96 | const char* ptr = |
504 | 96 | reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]); |
505 | | |
506 | 96 | if (array_nested_null_map == nullptr || |
507 | 96 | !array_nested_null_map[current_src_array_offset]) { |
508 | 96 | views.emplace_back(ptr, bytes_to_copy); |
509 | 96 | } |
510 | 96 | } |
511 | | |
512 | 32 | fmt::format_to(buffer, "{}", fmt::join(views, sep)); |
513 | | |
514 | 32 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
515 | 32 | res_offset); |
516 | 32 | } |
517 | 36 | } |
518 | | |
519 | | void _execute_string(const size_t& input_rows_count, const size_t& argument_size, |
520 | | fmt::memory_buffer& buffer, std::vector<std::string_view>& views, |
521 | | const std::vector<const Offsets*>& offsets_list, |
522 | | const std::vector<const Chars*>& chars_list, |
523 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
524 | 413 | Chars& res_data, Offsets& res_offset) const { |
525 | | // Concat string |
526 | 933 | for (size_t i = 0; i < input_rows_count; ++i) { |
527 | 520 | auto& sep_offsets = *offsets_list[0]; |
528 | 520 | auto& sep_chars = *chars_list[0]; |
529 | 520 | auto& sep_nullmap = *null_list[0]; |
530 | 520 | if (sep_nullmap[i]) { |
531 | 72 | res_offset[i] = res_data.size(); |
532 | 72 | continue; |
533 | 72 | } |
534 | | |
535 | 448 | int sep_size = sep_offsets[i] - sep_offsets[i - 1]; |
536 | 448 | const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]); |
537 | | |
538 | 448 | std::string_view sep(sep_data, sep_size); |
539 | 448 | buffer.clear(); |
540 | 448 | views.clear(); |
541 | 1.04k | for (size_t j = 1; j < argument_size; ++j) { |
542 | 600 | auto& current_offsets = *offsets_list[j]; |
543 | 600 | auto& current_chars = *chars_list[j]; |
544 | 600 | auto& current_nullmap = *null_list[j]; |
545 | 600 | int size = current_offsets[i] - current_offsets[i - 1]; |
546 | 600 | const char* ptr = |
547 | 600 | reinterpret_cast<const char*>(¤t_chars[current_offsets[i - 1]]); |
548 | 600 | if (!current_nullmap[i]) { |
549 | 548 | views.emplace_back(ptr, size); |
550 | 548 | } |
551 | 600 | } |
552 | 448 | fmt::format_to(buffer, "{}", fmt::join(views, sep)); |
553 | 448 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
554 | 448 | res_offset); |
555 | 448 | } |
556 | 413 | } |
557 | | }; |
558 | | |
559 | | class FunctionStringRepeat : public IFunction { |
560 | | public: |
561 | | static constexpr auto name = "repeat"; |
562 | 183 | static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); } |
563 | 1 | String get_name() const override { return name; } |
564 | 181 | size_t get_number_of_arguments() const override { return 2; } |
565 | | // should set NULL value of nested data to default, |
566 | | // as iff it's not inited and invalid, the repeat result of length is so large cause overflow |
567 | 163 | bool need_replace_null_data_to_default() const override { return true; } |
568 | | |
569 | 181 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
570 | 181 | return make_nullable(std::make_shared<DataTypeString>()); |
571 | 181 | } |
572 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
573 | 163 | uint32_t result, size_t input_rows_count) const override { |
574 | 163 | DCHECK_EQ(arguments.size(), 2); |
575 | 163 | auto res = ColumnString::create(); |
576 | 163 | auto null_map = ColumnUInt8::create(); |
577 | | |
578 | 163 | ColumnPtr argument_ptr[2]; |
579 | 163 | argument_ptr[0] = |
580 | 163 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
581 | 163 | argument_ptr[1] = block.get_by_position(arguments[1]).column; |
582 | | |
583 | 163 | if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) { |
584 | 163 | if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) { |
585 | 109 | RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(), |
586 | 109 | col2->get_data(), res->get_chars(), |
587 | 109 | res->get_offsets(), null_map->get_data())); |
588 | 109 | block.replace_by_position( |
589 | 109 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
590 | 109 | return Status::OK(); |
591 | 109 | } else if (const auto* col2_const = |
592 | 54 | check_and_get_column<ColumnConst>(*argument_ptr[1])) { |
593 | 54 | DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column())); |
594 | 54 | int repeat = col2_const->get_int(0); |
595 | 54 | if (repeat <= 0) { |
596 | 18 | null_map->get_data().resize_fill(input_rows_count, 0); |
597 | 18 | res->insert_many_defaults(input_rows_count); |
598 | 36 | } else { |
599 | 36 | vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(), |
600 | 36 | res->get_offsets(), null_map->get_data()); |
601 | 36 | } |
602 | 54 | block.replace_by_position( |
603 | 54 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
604 | 54 | return Status::OK(); |
605 | 54 | } |
606 | 163 | } |
607 | | |
608 | 0 | return Status::RuntimeError("repeat function get error param: {}, {}", |
609 | 0 | argument_ptr[0]->get_name(), argument_ptr[1]->get_name()); |
610 | 163 | } |
611 | | |
612 | | Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
613 | | const ColumnInt32::Container& repeats, ColumnString::Chars& res_data, |
614 | | ColumnString::Offsets& res_offsets, |
615 | 109 | ColumnUInt8::Container& null_map) const { |
616 | 109 | size_t input_row_size = offsets.size(); |
617 | | |
618 | 109 | fmt::memory_buffer buffer; |
619 | 109 | res_offsets.resize(input_row_size); |
620 | 109 | null_map.resize_fill(input_row_size, 0); |
621 | 277 | for (ssize_t i = 0; i < input_row_size; ++i) { |
622 | 168 | buffer.clear(); |
623 | 168 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
624 | 168 | size_t size = offsets[i] - offsets[i - 1]; |
625 | 168 | int repeat = repeats[i]; |
626 | 168 | if (repeat <= 0) { |
627 | 56 | StringOP::push_empty_string(i, res_data, res_offsets); |
628 | 112 | } else { |
629 | 112 | ColumnString::check_chars_length(repeat * size + res_data.size(), 0); |
630 | 644 | for (int j = 0; j < repeat; ++j) { |
631 | 532 | buffer.append(raw_str, raw_str + size); |
632 | 532 | } |
633 | 112 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, |
634 | 112 | res_data, res_offsets); |
635 | 112 | } |
636 | 168 | } |
637 | 109 | return Status::OK(); |
638 | 109 | } |
639 | | |
640 | | // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code |
641 | | // 2. abstract the `vector_vector` and `vector_const` |
642 | | // 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here |
643 | | void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
644 | | int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
645 | 36 | ColumnUInt8::Container& null_map) const { |
646 | 36 | size_t input_row_size = offsets.size(); |
647 | | |
648 | 36 | fmt::memory_buffer buffer; |
649 | 36 | res_offsets.resize(input_row_size); |
650 | 36 | null_map.resize_fill(input_row_size, 0); |
651 | 72 | for (ssize_t i = 0; i < input_row_size; ++i) { |
652 | 36 | buffer.clear(); |
653 | 36 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
654 | 36 | size_t size = offsets[i] - offsets[i - 1]; |
655 | 36 | ColumnString::check_chars_length(repeat * size + res_data.size(), 0); |
656 | | |
657 | 207 | for (int j = 0; j < repeat; ++j) { |
658 | 171 | buffer.append(raw_str, raw_str + size); |
659 | 171 | } |
660 | 36 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
661 | 36 | res_offsets); |
662 | 36 | } |
663 | 36 | } |
664 | | }; |
665 | | |
666 | | template <typename Impl> |
667 | | class FunctionStringPad : public IFunction { |
668 | | public: |
669 | | static constexpr auto name = Impl::name; |
670 | 1.37k | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv Line | Count | Source | 670 | 689 | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv Line | Count | Source | 670 | 682 | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); } |
|
671 | 2 | String get_name() const override { return name; }_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev Line | Count | Source | 671 | 1 | String get_name() const override { return name; } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev Line | Count | Source | 671 | 1 | String get_name() const override { return name; } |
|
672 | 1.36k | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv Line | Count | Source | 672 | 687 | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv Line | Count | Source | 672 | 680 | size_t get_number_of_arguments() const override { return 3; } |
|
673 | | |
674 | 1.36k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
675 | 1.36k | return make_nullable(std::make_shared<DataTypeString>()); |
676 | 1.36k | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 674 | 687 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 675 | 687 | return make_nullable(std::make_shared<DataTypeString>()); | 676 | 687 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 674 | 680 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 675 | 680 | return make_nullable(std::make_shared<DataTypeString>()); | 676 | 680 | } |
|
677 | | |
678 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
679 | 870 | uint32_t result, size_t input_rows_count) const override { |
680 | 870 | DCHECK_GE(arguments.size(), 3); |
681 | 870 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
682 | | // we create a zero column to simply implement |
683 | 870 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
684 | 870 | auto res = ColumnString::create(); |
685 | | |
686 | 870 | ColumnPtr col[3]; |
687 | 870 | bool col_const[3]; |
688 | 3.48k | for (size_t i = 0; i < 3; ++i) { |
689 | 2.61k | std::tie(col[i], col_const[i]) = |
690 | 2.61k | unpack_if_const(block.get_by_position(arguments[i]).column); |
691 | 2.61k | } |
692 | 870 | auto& null_map_data = null_map->get_data(); |
693 | 870 | auto& res_offsets = res->get_offsets(); |
694 | 870 | auto& res_chars = res->get_chars(); |
695 | 870 | res_offsets.resize(input_rows_count); |
696 | | |
697 | 870 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); |
698 | 870 | const auto& strcol_offsets = strcol->get_offsets(); |
699 | 870 | const auto& strcol_chars = strcol->get_chars(); |
700 | | |
701 | 870 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); |
702 | 870 | const auto& col_len_data = col_len->get_data(); |
703 | | |
704 | 870 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); |
705 | 870 | const auto& padcol_offsets = padcol->get_offsets(); |
706 | 870 | const auto& padcol_chars = padcol->get_chars(); |
707 | 870 | std::visit( |
708 | 870 | [&](auto str_const, auto len_const, auto pad_const) { |
709 | 870 | execute_utf8<str_const, len_const, pad_const>( |
710 | 870 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, |
711 | 870 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); |
712 | 870 | }, _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 708 | 63 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 63 | execute_utf8<str_const, len_const, pad_const>( | 710 | 63 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 63 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 63 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_ Line | Count | Source | 708 | 63 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 63 | execute_utf8<str_const, len_const, pad_const>( | 710 | 63 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 63 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 63 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_ Line | Count | Source | 708 | 62 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 62 | execute_utf8<str_const, len_const, pad_const>( | 710 | 62 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 62 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 62 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_ |
713 | 870 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), |
714 | 870 | make_bool_variant(col_const[2])); |
715 | | |
716 | 870 | block.get_by_position(result).column = |
717 | 870 | ColumnNullable::create(std::move(res), std::move(null_map)); |
718 | 870 | return Status::OK(); |
719 | 870 | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 679 | 435 | uint32_t result, size_t input_rows_count) const override { | 680 | 435 | DCHECK_GE(arguments.size(), 3); | 681 | 435 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 682 | | // we create a zero column to simply implement | 683 | 435 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); | 684 | 435 | auto res = ColumnString::create(); | 685 | | | 686 | 435 | ColumnPtr col[3]; | 687 | 435 | bool col_const[3]; | 688 | 1.74k | for (size_t i = 0; i < 3; ++i) { | 689 | 1.30k | std::tie(col[i], col_const[i]) = | 690 | 1.30k | unpack_if_const(block.get_by_position(arguments[i]).column); | 691 | 1.30k | } | 692 | 435 | auto& null_map_data = null_map->get_data(); | 693 | 435 | auto& res_offsets = res->get_offsets(); | 694 | 435 | auto& res_chars = res->get_chars(); | 695 | 435 | res_offsets.resize(input_rows_count); | 696 | | | 697 | 435 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); | 698 | 435 | const auto& strcol_offsets = strcol->get_offsets(); | 699 | 435 | const auto& strcol_chars = strcol->get_chars(); | 700 | | | 701 | 435 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); | 702 | 435 | const auto& col_len_data = col_len->get_data(); | 703 | | | 704 | 435 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); | 705 | 435 | const auto& padcol_offsets = padcol->get_offsets(); | 706 | 435 | const auto& padcol_chars = padcol->get_chars(); | 707 | 435 | std::visit( | 708 | 435 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 435 | execute_utf8<str_const, len_const, pad_const>( | 710 | 435 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 435 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 435 | }, | 713 | 435 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 714 | 435 | make_bool_variant(col_const[2])); | 715 | | | 716 | 435 | block.get_by_position(result).column = | 717 | 435 | ColumnNullable::create(std::move(res), std::move(null_map)); | 718 | 435 | return Status::OK(); | 719 | 435 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 679 | 435 | uint32_t result, size_t input_rows_count) const override { | 680 | 435 | DCHECK_GE(arguments.size(), 3); | 681 | 435 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 682 | | // we create a zero column to simply implement | 683 | 435 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); | 684 | 435 | auto res = ColumnString::create(); | 685 | | | 686 | 435 | ColumnPtr col[3]; | 687 | 435 | bool col_const[3]; | 688 | 1.74k | for (size_t i = 0; i < 3; ++i) { | 689 | 1.30k | std::tie(col[i], col_const[i]) = | 690 | 1.30k | unpack_if_const(block.get_by_position(arguments[i]).column); | 691 | 1.30k | } | 692 | 435 | auto& null_map_data = null_map->get_data(); | 693 | 435 | auto& res_offsets = res->get_offsets(); | 694 | 435 | auto& res_chars = res->get_chars(); | 695 | 435 | res_offsets.resize(input_rows_count); | 696 | | | 697 | 435 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); | 698 | 435 | const auto& strcol_offsets = strcol->get_offsets(); | 699 | 435 | const auto& strcol_chars = strcol->get_chars(); | 700 | | | 701 | 435 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); | 702 | 435 | const auto& col_len_data = col_len->get_data(); | 703 | | | 704 | 435 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); | 705 | 435 | const auto& padcol_offsets = padcol->get_offsets(); | 706 | 435 | const auto& padcol_chars = padcol->get_chars(); | 707 | 435 | std::visit( | 708 | 435 | [&](auto str_const, auto len_const, auto pad_const) { | 709 | 435 | execute_utf8<str_const, len_const, pad_const>( | 710 | 435 | strcol_offsets, strcol_chars, col_len_data, padcol_offsets, | 711 | 435 | padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); | 712 | 435 | }, | 713 | 435 | make_bool_variant(col_const[0]), make_bool_variant(col_const[1]), | 714 | 435 | make_bool_variant(col_const[2])); | 715 | | | 716 | 435 | block.get_by_position(result).column = | 717 | 435 | ColumnNullable::create(std::move(res), std::move(null_map)); | 718 | 435 | return Status::OK(); | 719 | 435 | } |
|
720 | | |
721 | | template <bool str_const, bool len_const, bool pad_const> |
722 | | void execute_utf8(const ColumnString::Offsets& strcol_offsets, |
723 | | const ColumnString::Chars& strcol_chars, |
724 | | const ColumnInt32::Container& col_len_data, |
725 | | const ColumnString::Offsets& padcol_offsets, |
726 | | const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets, |
727 | | ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data, |
728 | 870 | size_t input_rows_count) const { |
729 | 870 | std::vector<size_t> pad_index; |
730 | 870 | size_t const_pad_char_size = 0; |
731 | | // If pad_const = true, initialize pad_index only once. |
732 | | // The same logic applies to the if constexpr (!pad_const) condition below. |
733 | 870 | if constexpr (pad_const) { |
734 | 372 | const_pad_char_size = simd::VStringFunctions::get_char_len( |
735 | 372 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); |
736 | 372 | } |
737 | | |
738 | 870 | fmt::memory_buffer buffer; |
739 | 870 | buffer.resize(strcol_chars.size()); |
740 | 870 | size_t buffer_len = 0; |
741 | | |
742 | 1.93k | for (size_t i = 0; i < input_rows_count; ++i) { |
743 | 1.06k | if constexpr (!pad_const) { |
744 | 691 | pad_index.clear(); |
745 | 691 | } |
746 | 1.06k | const auto len = col_len_data[index_check_const<len_const>(i)]; |
747 | 1.06k | if (len < 0) { |
748 | | // return NULL when input length is invalid number |
749 | 548 | null_map_data[i] = true; |
750 | 548 | res_offsets[i] = buffer_len; |
751 | 548 | } else { |
752 | 515 | const auto str_idx = index_check_const<str_const>(i); |
753 | 515 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; |
754 | 515 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; |
755 | 515 | const auto pad_idx = index_check_const<pad_const>(i); |
756 | 515 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; |
757 | 515 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; |
758 | | |
759 | 515 | auto [iterate_byte_len, iterate_char_len] = |
760 | 515 | simd::VStringFunctions::iterate_utf8_with_limit_length( |
761 | 515 | (const char*)str_data, (const char*)str_data + str_len, len); |
762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len |
763 | 515 | if (iterate_char_len == len) { |
764 | 471 | buffer.resize(buffer_len + iterate_byte_len); |
765 | 471 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); |
766 | 471 | buffer_len += iterate_byte_len; |
767 | 471 | res_offsets[i] = buffer_len; |
768 | 471 | continue; |
769 | 471 | } |
770 | 44 | size_t pad_char_size; |
771 | 44 | if constexpr (!pad_const) { |
772 | 32 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, |
773 | 32 | pad_len, pad_index); |
774 | 32 | } else { |
775 | 12 | pad_char_size = const_pad_char_size; |
776 | 12 | } |
777 | | |
778 | | // make compatible with mysql. return empty string if pad is empty |
779 | 44 | if (pad_char_size == 0) { |
780 | 6 | res_offsets[i] = buffer_len; |
781 | 6 | continue; |
782 | 6 | } |
783 | 38 | const size_t str_char_size = iterate_char_len; |
784 | 38 | const size_t pad_times = (len - str_char_size) / pad_char_size; |
785 | 38 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; |
786 | 38 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; |
787 | 38 | ColumnString::check_chars_length(buffer_len + new_capacity, i); |
788 | 38 | buffer.resize(buffer_len + new_capacity); |
789 | 38 | if constexpr (!Impl::is_lpad) { |
790 | 19 | memcpy(buffer.data() + buffer_len, str_data, str_len); |
791 | 19 | buffer_len += str_len; |
792 | 19 | } |
793 | | // Prepend chars of pad. |
794 | 38 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, |
795 | 38 | pad_times); |
796 | 38 | buffer_len += pad_times * pad_len; |
797 | | |
798 | 38 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); |
799 | 38 | buffer_len += pad_remainder_len; |
800 | | |
801 | 38 | if constexpr (Impl::is_lpad) { |
802 | 19 | memcpy(buffer.data() + buffer_len, str_data, str_len); |
803 | 19 | buffer_len += str_len; |
804 | 19 | } |
805 | 38 | res_offsets[i] = buffer_len; |
806 | 38 | } |
807 | 1.06k | } |
808 | 870 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); |
809 | 870 | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 63 | size_t input_rows_count) const { | 729 | 63 | std::vector<size_t> pad_index; | 730 | 63 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 63 | fmt::memory_buffer buffer; | 739 | 63 | buffer.resize(strcol_chars.size()); | 740 | 63 | size_t buffer_len = 0; | 741 | | | 742 | 223 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 160 | if constexpr (!pad_const) { | 744 | 160 | pad_index.clear(); | 745 | 160 | } | 746 | 160 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 160 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 82 | null_map_data[i] = true; | 750 | 82 | res_offsets[i] = buffer_len; | 751 | 82 | } else { | 752 | 78 | const auto str_idx = index_check_const<str_const>(i); | 753 | 78 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 78 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 78 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 78 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 78 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 78 | auto [iterate_byte_len, iterate_char_len] = | 760 | 78 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 78 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 78 | if (iterate_char_len == len) { | 764 | 68 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 68 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 68 | buffer_len += iterate_byte_len; | 767 | 68 | res_offsets[i] = buffer_len; | 768 | 68 | continue; | 769 | 68 | } | 770 | 10 | size_t pad_char_size; | 771 | 10 | if constexpr (!pad_const) { | 772 | 10 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 10 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 10 | if (pad_char_size == 0) { | 780 | 3 | res_offsets[i] = buffer_len; | 781 | 3 | continue; | 782 | 3 | } | 783 | 7 | const size_t str_char_size = iterate_char_len; | 784 | 7 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 7 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 7 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 7 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 7 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 7 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 7 | pad_times); | 796 | 7 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 7 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 7 | buffer_len += pad_remainder_len; | 800 | | | 801 | 7 | if constexpr (Impl::is_lpad) { | 802 | 7 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 7 | buffer_len += str_len; | 804 | 7 | } | 805 | 7 | res_offsets[i] = buffer_len; | 806 | 7 | } | 807 | 160 | } | 808 | 63 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 63 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | 62 | if constexpr (pad_const) { | 734 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | 62 | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | | if constexpr (!pad_const) { | 744 | | pad_index.clear(); | 745 | | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | | if constexpr (!pad_const) { | 772 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | | pad_len, pad_index); | 774 | 2 | } else { | 775 | 2 | pad_char_size = const_pad_char_size; | 776 | 2 | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | 2 | if constexpr (Impl::is_lpad) { | 802 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 2 | buffer_len += str_len; | 804 | 2 | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 62 | if constexpr (!pad_const) { | 744 | 62 | pad_index.clear(); | 745 | 62 | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | 2 | if constexpr (!pad_const) { | 772 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 2 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | 2 | if constexpr (Impl::is_lpad) { | 802 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 2 | buffer_len += str_len; | 804 | 2 | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | 62 | if constexpr (pad_const) { | 734 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | 62 | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | | if constexpr (!pad_const) { | 744 | | pad_index.clear(); | 745 | | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | | if constexpr (!pad_const) { | 772 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | | pad_len, pad_index); | 774 | 2 | } else { | 775 | 2 | pad_char_size = const_pad_char_size; | 776 | 2 | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | 2 | if constexpr (Impl::is_lpad) { | 802 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 2 | buffer_len += str_len; | 804 | 2 | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 62 | if constexpr (!pad_const) { | 744 | 62 | pad_index.clear(); | 745 | 62 | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | 2 | if constexpr (!pad_const) { | 772 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 2 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | 2 | if constexpr (Impl::is_lpad) { | 802 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 2 | buffer_len += str_len; | 804 | 2 | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | 62 | if constexpr (pad_const) { | 734 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | 62 | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | | if constexpr (!pad_const) { | 744 | | pad_index.clear(); | 745 | | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | | if constexpr (!pad_const) { | 772 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | | pad_len, pad_index); | 774 | 2 | } else { | 775 | 2 | pad_char_size = const_pad_char_size; | 776 | 2 | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | 2 | if constexpr (Impl::is_lpad) { | 802 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 2 | buffer_len += str_len; | 804 | 2 | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 62 | if constexpr (!pad_const) { | 744 | 62 | pad_index.clear(); | 745 | 62 | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | 2 | if constexpr (!pad_const) { | 772 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 2 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | | if constexpr (!Impl::is_lpad) { | 790 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | | buffer_len += str_len; | 792 | | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | 2 | if constexpr (Impl::is_lpad) { | 802 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | 2 | buffer_len += str_len; | 804 | 2 | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 63 | size_t input_rows_count) const { | 729 | 63 | std::vector<size_t> pad_index; | 730 | 63 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 63 | fmt::memory_buffer buffer; | 739 | 63 | buffer.resize(strcol_chars.size()); | 740 | 63 | size_t buffer_len = 0; | 741 | | | 742 | 222 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 159 | if constexpr (!pad_const) { | 744 | 159 | pad_index.clear(); | 745 | 159 | } | 746 | 159 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 159 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 82 | null_map_data[i] = true; | 750 | 82 | res_offsets[i] = buffer_len; | 751 | 82 | } else { | 752 | 77 | const auto str_idx = index_check_const<str_const>(i); | 753 | 77 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 77 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 77 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 77 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 77 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 77 | auto [iterate_byte_len, iterate_char_len] = | 760 | 77 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 77 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 77 | if (iterate_char_len == len) { | 764 | 67 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 67 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 67 | buffer_len += iterate_byte_len; | 767 | 67 | res_offsets[i] = buffer_len; | 768 | 67 | continue; | 769 | 67 | } | 770 | 10 | size_t pad_char_size; | 771 | 10 | if constexpr (!pad_const) { | 772 | 10 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 10 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 10 | if (pad_char_size == 0) { | 780 | 3 | res_offsets[i] = buffer_len; | 781 | 3 | continue; | 782 | 3 | } | 783 | 7 | const size_t str_char_size = iterate_char_len; | 784 | 7 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 7 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 7 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 7 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 7 | buffer.resize(buffer_len + new_capacity); | 789 | 7 | if constexpr (!Impl::is_lpad) { | 790 | 7 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 7 | buffer_len += str_len; | 792 | 7 | } | 793 | | // Prepend chars of pad. | 794 | 7 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 7 | pad_times); | 796 | 7 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 7 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 7 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 7 | res_offsets[i] = buffer_len; | 806 | 7 | } | 807 | 159 | } | 808 | 63 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 63 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | 62 | if constexpr (pad_const) { | 734 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | 62 | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | | if constexpr (!pad_const) { | 744 | | pad_index.clear(); | 745 | | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | | if constexpr (!pad_const) { | 772 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | | pad_len, pad_index); | 774 | 2 | } else { | 775 | 2 | pad_char_size = const_pad_char_size; | 776 | 2 | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | 2 | if constexpr (!Impl::is_lpad) { | 790 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 2 | buffer_len += str_len; | 792 | 2 | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 62 | if constexpr (!pad_const) { | 744 | 62 | pad_index.clear(); | 745 | 62 | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | 2 | if constexpr (!pad_const) { | 772 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 2 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | 2 | if constexpr (!Impl::is_lpad) { | 790 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 2 | buffer_len += str_len; | 792 | 2 | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | 62 | if constexpr (pad_const) { | 734 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | 62 | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | | if constexpr (!pad_const) { | 744 | | pad_index.clear(); | 745 | | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | | if constexpr (!pad_const) { | 772 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | | pad_len, pad_index); | 774 | 2 | } else { | 775 | 2 | pad_char_size = const_pad_char_size; | 776 | 2 | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | 2 | if constexpr (!Impl::is_lpad) { | 790 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 2 | buffer_len += str_len; | 792 | 2 | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 62 | if constexpr (!pad_const) { | 744 | 62 | pad_index.clear(); | 745 | 62 | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | 2 | if constexpr (!pad_const) { | 772 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 2 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | 2 | if constexpr (!Impl::is_lpad) { | 790 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 2 | buffer_len += str_len; | 792 | 2 | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | 62 | if constexpr (pad_const) { | 734 | 62 | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | 62 | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | 62 | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | | if constexpr (!pad_const) { | 744 | | pad_index.clear(); | 745 | | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | | if constexpr (!pad_const) { | 772 | | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | | pad_len, pad_index); | 774 | 2 | } else { | 775 | 2 | pad_char_size = const_pad_char_size; | 776 | 2 | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | 2 | if constexpr (!Impl::is_lpad) { | 790 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 2 | buffer_len += str_len; | 792 | 2 | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m Line | Count | Source | 728 | 62 | size_t input_rows_count) const { | 729 | 62 | std::vector<size_t> pad_index; | 730 | 62 | size_t const_pad_char_size = 0; | 731 | | // If pad_const = true, initialize pad_index only once. | 732 | | // The same logic applies to the if constexpr (!pad_const) condition below. | 733 | | if constexpr (pad_const) { | 734 | | const_pad_char_size = simd::VStringFunctions::get_char_len( | 735 | | (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); | 736 | | } | 737 | | | 738 | 62 | fmt::memory_buffer buffer; | 739 | 62 | buffer.resize(strcol_chars.size()); | 740 | 62 | size_t buffer_len = 0; | 741 | | | 742 | 124 | for (size_t i = 0; i < input_rows_count; ++i) { | 743 | 62 | if constexpr (!pad_const) { | 744 | 62 | pad_index.clear(); | 745 | 62 | } | 746 | 62 | const auto len = col_len_data[index_check_const<len_const>(i)]; | 747 | 62 | if (len < 0) { | 748 | | // return NULL when input length is invalid number | 749 | 32 | null_map_data[i] = true; | 750 | 32 | res_offsets[i] = buffer_len; | 751 | 32 | } else { | 752 | 30 | const auto str_idx = index_check_const<str_const>(i); | 753 | 30 | const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; | 754 | 30 | const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; | 755 | 30 | const auto pad_idx = index_check_const<pad_const>(i); | 756 | 30 | const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; | 757 | 30 | const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; | 758 | | | 759 | 30 | auto [iterate_byte_len, iterate_char_len] = | 760 | 30 | simd::VStringFunctions::iterate_utf8_with_limit_length( | 761 | 30 | (const char*)str_data, (const char*)str_data + str_len, len); | 762 | | // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len | 763 | 30 | if (iterate_char_len == len) { | 764 | 28 | buffer.resize(buffer_len + iterate_byte_len); | 765 | 28 | memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); | 766 | 28 | buffer_len += iterate_byte_len; | 767 | 28 | res_offsets[i] = buffer_len; | 768 | 28 | continue; | 769 | 28 | } | 770 | 2 | size_t pad_char_size; | 771 | 2 | if constexpr (!pad_const) { | 772 | 2 | pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, | 773 | 2 | pad_len, pad_index); | 774 | | } else { | 775 | | pad_char_size = const_pad_char_size; | 776 | | } | 777 | | | 778 | | // make compatible with mysql. return empty string if pad is empty | 779 | 2 | if (pad_char_size == 0) { | 780 | 0 | res_offsets[i] = buffer_len; | 781 | 0 | continue; | 782 | 0 | } | 783 | 2 | const size_t str_char_size = iterate_char_len; | 784 | 2 | const size_t pad_times = (len - str_char_size) / pad_char_size; | 785 | 2 | const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; | 786 | 2 | const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; | 787 | 2 | ColumnString::check_chars_length(buffer_len + new_capacity, i); | 788 | 2 | buffer.resize(buffer_len + new_capacity); | 789 | 2 | if constexpr (!Impl::is_lpad) { | 790 | 2 | memcpy(buffer.data() + buffer_len, str_data, str_len); | 791 | 2 | buffer_len += str_len; | 792 | 2 | } | 793 | | // Prepend chars of pad. | 794 | 2 | StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len, | 795 | 2 | pad_times); | 796 | 2 | buffer_len += pad_times * pad_len; | 797 | | | 798 | 2 | memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); | 799 | 2 | buffer_len += pad_remainder_len; | 800 | | | 801 | | if constexpr (Impl::is_lpad) { | 802 | | memcpy(buffer.data() + buffer_len, str_data, str_len); | 803 | | buffer_len += str_len; | 804 | | } | 805 | 2 | res_offsets[i] = buffer_len; | 806 | 2 | } | 807 | 62 | } | 808 | 62 | res_chars.insert(buffer.data(), buffer.data() + buffer_len); | 809 | 62 | } |
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m |
810 | | }; |
811 | | |
812 | | #include "common/compile_check_avoid_end.h" |
813 | | } // namespace doris |