be/src/exprs/function/function_string_concat.h
Line | Count | Source |
1 | | // Licensed to the Apache Software Foundation (ASF) under one |
2 | | // or more contributor license agreements. See the NOTICE file |
3 | | // distributed with this work for additional information |
4 | | // regarding copyright ownership. The ASF licenses this file |
5 | | // to you under the Apache License, Version 2.0 (the |
6 | | // "License"); you may not use this file except in compliance |
7 | | // with the License. You may obtain a copy of the License at |
8 | | // |
9 | | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | | // |
11 | | // Unless required by applicable law or agreed to in writing, |
12 | | // software distributed under the License is distributed on an |
13 | | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | | // KIND, either express or implied. See the License for the |
15 | | // specific language governing permissions and limitations |
16 | | // under the License. |
17 | | |
18 | | #pragma once |
19 | | |
20 | | #include <fmt/format.h> |
21 | | |
22 | | #include <cstddef> |
23 | | #include <cstring> |
24 | | #include <string> |
25 | | #include <string_view> |
26 | | #include <vector> |
27 | | |
28 | | #include "common/status.h" |
29 | | #include "core/assert_cast.h" |
30 | | #include "core/block/block.h" |
31 | | #include "core/block/column_numbers.h" |
32 | | #include "core/column/column.h" |
33 | | #include "core/column/column_array.h" |
34 | | #include "core/column/column_const.h" |
35 | | #include "core/column/column_nullable.h" |
36 | | #include "core/column/column_string.h" |
37 | | #include "core/column/column_vector.h" |
38 | | #include "core/data_type/data_type_array.h" |
39 | | #include "core/data_type/data_type_nullable.h" |
40 | | #include "core/data_type/data_type_number.h" |
41 | | #include "core/data_type/data_type_string.h" |
42 | | #include "core/memcpy_small.h" |
43 | | #include "core/string_ref.h" |
44 | | #include "exec/common/stringop_substring.h" |
45 | | #include "exec/common/template_helpers.hpp" |
46 | | #include "exec/common/util.hpp" |
47 | | #include "exprs/function/function.h" |
48 | | #include "exprs/function/function_helpers.h" |
49 | | #include "exprs/function_context.h" |
50 | | #include "util/simd/vstring_function.h" |
51 | | |
52 | | namespace doris { |
53 | | #include "common/compile_check_avoid_begin.h" |
54 | | |
55 | | class FunctionStringConcat : public IFunction { |
56 | | public: |
57 | | struct ConcatState { |
58 | | bool use_state = false; |
59 | | std::string tail; |
60 | | }; |
61 | | |
62 | | static constexpr auto name = "concat"; |
63 | 2.50k | static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); } |
64 | 0 | String get_name() const override { return name; } |
65 | 0 | size_t get_number_of_arguments() const override { return 0; } |
66 | 2.49k | bool is_variadic() const override { return true; } |
67 | | |
68 | 2.49k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
69 | 2.49k | return std::make_shared<DataTypeString>(); |
70 | 2.49k | } |
71 | | |
72 | 7.19k | Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { |
73 | 7.19k | if (scope == FunctionContext::THREAD_LOCAL) { |
74 | 4.70k | return Status::OK(); |
75 | 4.70k | } |
76 | 2.49k | std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>(); |
77 | | |
78 | 2.49k | context->set_function_state(scope, state); |
79 | | |
80 | 2.49k | state->use_state = true; |
81 | | |
82 | | // Optimize function calls like this: |
83 | | // concat(col, "123", "abc", "456") -> tail = "123abc456" |
84 | 4.79k | for (size_t i = 1; i < context->get_num_args(); i++) { |
85 | 4.58k | const auto* column_string = context->get_constant_col(i); |
86 | 4.58k | if (column_string == nullptr) { |
87 | 2.25k | state->use_state = false; |
88 | 2.25k | return IFunction::open(context, scope); |
89 | 2.25k | } |
90 | 2.33k | auto string_vale = column_string->column_ptr->get_data_at(0); |
91 | 2.33k | if (string_vale.data == nullptr) { |
92 | | // For concat(col, null), it is handled by default_implementation_for_nulls |
93 | 28 | state->use_state = false; |
94 | 28 | return IFunction::open(context, scope); |
95 | 28 | } |
96 | | |
97 | 2.30k | state->tail.append(string_vale.begin(), string_vale.size); |
98 | 2.30k | } |
99 | | |
100 | | // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below. |
101 | 209 | state->tail.reserve(state->tail.size() + 16); |
102 | | |
103 | 209 | return IFunction::open(context, scope); |
104 | 2.49k | } |
105 | | |
106 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
107 | 2.37k | uint32_t result, size_t input_rows_count) const override { |
108 | 2.37k | DCHECK_GE(arguments.size(), 1); |
109 | | |
110 | 2.37k | if (arguments.size() == 1) { |
111 | 3 | block.get_by_position(result).column = block.get_by_position(arguments[0]).column; |
112 | 3 | return Status::OK(); |
113 | 3 | } |
114 | 2.37k | auto* concat_state = reinterpret_cast<ConcatState*>( |
115 | 2.37k | context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); |
116 | 2.37k | if (!concat_state) { |
117 | 0 | return Status::RuntimeError("funciton context for function '{}' must have ConcatState;", |
118 | 0 | get_name()); |
119 | 0 | } |
120 | 2.37k | if (concat_state->use_state) { |
121 | 187 | const auto& [col, is_const] = |
122 | 187 | unpack_if_const(block.get_by_position(arguments[0]).column); |
123 | 187 | const auto* col_str = assert_cast<const ColumnString*>(col.get()); |
124 | 187 | if (is_const) { |
125 | 0 | return execute_const<true>(concat_state, block, col_str, result, input_rows_count); |
126 | 187 | } else { |
127 | 187 | return execute_const<false>(concat_state, block, col_str, result, input_rows_count); |
128 | 187 | } |
129 | | |
130 | 2.18k | } else { |
131 | 2.18k | return execute_vecotr(block, arguments, result, input_rows_count); |
132 | 2.18k | } |
133 | 2.37k | } |
134 | | |
135 | | Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result, |
136 | 2.18k | size_t input_rows_count) const { |
137 | 2.18k | int argument_size = arguments.size(); |
138 | 2.18k | std::vector<ColumnPtr> argument_columns(argument_size); |
139 | | |
140 | 2.18k | std::vector<const ColumnString::Offsets*> offsets_list(argument_size); |
141 | 2.18k | std::vector<const ColumnString::Chars*> chars_list(argument_size); |
142 | 2.18k | std::vector<bool> is_const_args(argument_size); |
143 | | |
144 | 8.63k | for (int i = 0; i < argument_size; ++i) { |
145 | 6.44k | const auto& [col, is_const] = |
146 | 6.44k | unpack_if_const(block.get_by_position(arguments[i]).column); |
147 | | |
148 | 6.44k | const auto* col_str = assert_cast<const ColumnString*>(col.get()); |
149 | 6.44k | offsets_list[i] = &col_str->get_offsets(); |
150 | 6.44k | chars_list[i] = &col_str->get_chars(); |
151 | 6.44k | is_const_args[i] = is_const; |
152 | 6.44k | } |
153 | | |
154 | 2.18k | auto res = ColumnString::create(); |
155 | 2.18k | auto& res_data = res->get_chars(); |
156 | 2.18k | auto& res_offset = res->get_offsets(); |
157 | | |
158 | 2.18k | res_offset.resize(input_rows_count); |
159 | 2.18k | size_t res_reserve_size = 0; |
160 | 8.63k | for (size_t i = 0; i < argument_size; ++i) { |
161 | 6.44k | if (is_const_args[i]) { |
162 | 2.18k | res_reserve_size += (*offsets_list[i])[0] * input_rows_count; |
163 | 4.26k | } else { |
164 | 4.26k | res_reserve_size += (*offsets_list[i])[input_rows_count - 1]; |
165 | 4.26k | } |
166 | 6.44k | } |
167 | | |
168 | 2.18k | ColumnString::check_chars_length(res_reserve_size, 0); |
169 | | |
170 | 2.18k | res_data.resize(res_reserve_size); |
171 | | |
172 | 2.18k | auto* data = res_data.data(); |
173 | 2.18k | size_t dst_offset = 0; |
174 | | |
175 | 72.3k | for (size_t i = 0; i < input_rows_count; ++i) { |
176 | 218k | for (size_t j = 0; j < argument_size; ++j) { |
177 | 148k | const auto& current_offsets = *offsets_list[j]; |
178 | 148k | const auto& current_chars = *chars_list[j]; |
179 | 148k | auto idx = index_check_const(i, is_const_args[j]); |
180 | 148k | const auto size = current_offsets[idx] - current_offsets[idx - 1]; |
181 | 148k | if (size > 0) { |
182 | 148k | memcpy_small_allow_read_write_overflow15( |
183 | 148k | data + dst_offset, current_chars.data() + current_offsets[idx - 1], |
184 | 148k | size); |
185 | 148k | dst_offset += size; |
186 | 148k | } |
187 | 148k | } |
188 | 70.1k | res_offset[i] = dst_offset; |
189 | 70.1k | } |
190 | | |
191 | 2.18k | block.get_by_position(result).column = std::move(res); |
192 | 2.18k | return Status::OK(); |
193 | 2.18k | } |
194 | | |
195 | | template <bool is_const> |
196 | | Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str, |
197 | 187 | uint32_t result, size_t input_rows_count) const { |
198 | | // using tail optimize |
199 | | |
200 | 187 | auto res = ColumnString::create(); |
201 | 187 | auto& res_data = res->get_chars(); |
202 | 187 | auto& res_offset = res->get_offsets(); |
203 | 187 | res_offset.resize(input_rows_count); |
204 | | |
205 | 187 | size_t res_reserve_size = 0; |
206 | 187 | if constexpr (is_const) { |
207 | 0 | res_reserve_size = col_str->get_offsets()[0] * input_rows_count; |
208 | 187 | } else { |
209 | 187 | res_reserve_size = col_str->get_offsets()[input_rows_count - 1]; |
210 | 187 | } |
211 | 187 | res_reserve_size += concat_state->tail.size() * input_rows_count; |
212 | | |
213 | 187 | ColumnString::check_chars_length(res_reserve_size, 0); |
214 | 187 | res_data.resize(res_reserve_size); |
215 | | |
216 | 187 | const auto& tail = concat_state->tail; |
217 | 187 | auto* data = res_data.data(); |
218 | 187 | size_t dst_offset = 0; |
219 | | |
220 | 394 | for (size_t i = 0; i < input_rows_count; ++i) { |
221 | 207 | const auto idx = index_check_const<is_const>(i); |
222 | 207 | StringRef str_val = col_str->get_data_at(idx); |
223 | | // copy column |
224 | 207 | memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size); |
225 | 207 | dst_offset += str_val.size; |
226 | | // copy tail |
227 | 207 | memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size()); |
228 | 207 | dst_offset += tail.size(); |
229 | 207 | res_offset[i] = dst_offset; |
230 | 207 | } |
231 | 187 | block.get_by_position(result).column = std::move(res); |
232 | 187 | return Status::OK(); |
233 | 187 | } Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm _ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm Line | Count | Source | 197 | 187 | uint32_t result, size_t input_rows_count) const { | 198 | | // using tail optimize | 199 | | | 200 | 187 | auto res = ColumnString::create(); | 201 | 187 | auto& res_data = res->get_chars(); | 202 | 187 | auto& res_offset = res->get_offsets(); | 203 | 187 | res_offset.resize(input_rows_count); | 204 | | | 205 | 187 | size_t res_reserve_size = 0; | 206 | | if constexpr (is_const) { | 207 | | res_reserve_size = col_str->get_offsets()[0] * input_rows_count; | 208 | 187 | } else { | 209 | 187 | res_reserve_size = col_str->get_offsets()[input_rows_count - 1]; | 210 | 187 | } | 211 | 187 | res_reserve_size += concat_state->tail.size() * input_rows_count; | 212 | | | 213 | 187 | ColumnString::check_chars_length(res_reserve_size, 0); | 214 | 187 | res_data.resize(res_reserve_size); | 215 | | | 216 | 187 | const auto& tail = concat_state->tail; | 217 | 187 | auto* data = res_data.data(); | 218 | 187 | size_t dst_offset = 0; | 219 | | | 220 | 394 | for (size_t i = 0; i < input_rows_count; ++i) { | 221 | 207 | const auto idx = index_check_const<is_const>(i); | 222 | 207 | StringRef str_val = col_str->get_data_at(idx); | 223 | | // copy column | 224 | 207 | memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size); | 225 | 207 | dst_offset += str_val.size; | 226 | | // copy tail | 227 | 207 | memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size()); | 228 | 207 | dst_offset += tail.size(); | 229 | 207 | res_offset[i] = dst_offset; | 230 | 207 | } | 231 | 187 | block.get_by_position(result).column = std::move(res); | 232 | 187 | return Status::OK(); | 233 | 187 | } |
|
234 | | }; |
235 | | |
236 | | class FunctionStringElt : public IFunction { |
237 | | public: |
238 | | static constexpr auto name = "elt"; |
239 | 366 | static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); } |
240 | 0 | String get_name() const override { return name; } |
241 | 0 | size_t get_number_of_arguments() const override { return 0; } |
242 | 359 | bool is_variadic() const override { return true; } |
243 | | |
244 | 358 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
245 | 358 | return make_nullable(std::make_shared<DataTypeString>()); |
246 | 358 | } |
247 | 716 | bool use_default_implementation_for_nulls() const override { return false; } |
248 | | |
249 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
250 | 358 | uint32_t result, size_t input_rows_count) const override { |
251 | 358 | int arguent_size = arguments.size(); |
252 | 358 | int num_children = arguent_size - 1; |
253 | 358 | auto res = ColumnString::create(); |
254 | | |
255 | 358 | if (auto const_column = check_and_get_column<ColumnConst>( |
256 | 358 | *block.get_by_position(arguments[0]).column)) { |
257 | 153 | auto data = const_column->get_data_at(0); |
258 | | // return NULL, pos is null or pos < 0 or pos > num_children |
259 | 153 | auto is_null = data.data == nullptr; |
260 | 153 | auto pos = is_null ? 0 : *(Int32*)data.data; |
261 | 153 | is_null = pos <= 0 || pos > num_children; |
262 | | |
263 | 153 | auto null_map = ColumnUInt8::create(input_rows_count, is_null); |
264 | 153 | if (is_null) { |
265 | 135 | res->insert_many_defaults(input_rows_count); |
266 | 135 | } else { |
267 | 18 | auto& target_column = block.get_by_position(arguments[pos]).column; |
268 | 18 | if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) { |
269 | 6 | auto target_data = target_const_column->get_data_at(0); |
270 | | // return NULL, no target data |
271 | 6 | if (target_data.data == nullptr) { |
272 | 0 | null_map = ColumnUInt8::create(input_rows_count, true); |
273 | 0 | res->insert_many_defaults(input_rows_count); |
274 | 6 | } else { |
275 | 6 | res->insert_data_repeatedly(target_data.data, target_data.size, |
276 | 6 | input_rows_count); |
277 | 6 | } |
278 | 12 | } else if (auto target_nullable_column = |
279 | 12 | check_and_get_column<ColumnNullable>(*target_column)) { |
280 | 12 | auto& target_null_map = target_nullable_column->get_null_map_data(); |
281 | 12 | VectorizedUtils::update_null_map(null_map->get_data(), target_null_map); |
282 | | |
283 | 12 | auto& target_str_column = assert_cast<const ColumnString&>( |
284 | 12 | target_nullable_column->get_nested_column()); |
285 | 12 | res->get_chars().assign(target_str_column.get_chars().begin(), |
286 | 12 | target_str_column.get_chars().end()); |
287 | 12 | res->get_offsets().assign(target_str_column.get_offsets().begin(), |
288 | 12 | target_str_column.get_offsets().end()); |
289 | 12 | } else { |
290 | 0 | auto& target_str_column = assert_cast<const ColumnString&>(*target_column); |
291 | 0 | res->get_chars().assign(target_str_column.get_chars().begin(), |
292 | 0 | target_str_column.get_chars().end()); |
293 | 0 | res->get_offsets().assign(target_str_column.get_offsets().begin(), |
294 | 0 | target_str_column.get_offsets().end()); |
295 | 0 | } |
296 | 18 | } |
297 | 153 | block.get_by_position(result).column = |
298 | 153 | ColumnNullable::create(std::move(res), std::move(null_map)); |
299 | 205 | } else if (auto pos_null_column = check_and_get_column<ColumnNullable>( |
300 | 205 | *block.get_by_position(arguments[0]).column)) { |
301 | 205 | auto& pos_column = |
302 | 205 | assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column()); |
303 | 205 | auto& pos_null_map = pos_null_column->get_null_map_data(); |
304 | 205 | auto null_map = ColumnUInt8::create(input_rows_count, false); |
305 | 205 | auto& res_null_map = null_map->get_data(); |
306 | | |
307 | 460 | for (size_t i = 0; i < input_rows_count; ++i) { |
308 | 255 | auto pos = pos_column.get_element(i); |
309 | 255 | res_null_map[i] = |
310 | 255 | pos_null_map[i] || pos <= 0 || pos > num_children || |
311 | 255 | block.get_by_position(arguments[pos]).column->get_data_at(i).data == |
312 | 30 | nullptr; |
313 | 255 | if (res_null_map[i]) { |
314 | 225 | res->insert_default(); |
315 | 225 | } else { |
316 | 30 | auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i); |
317 | 30 | res->insert_data(insert_data.data, insert_data.size); |
318 | 30 | } |
319 | 255 | } |
320 | 205 | block.get_by_position(result).column = |
321 | 205 | ColumnNullable::create(std::move(res), std::move(null_map)); |
322 | 205 | } else { |
323 | 0 | auto& pos_column = |
324 | 0 | assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column); |
325 | 0 | auto null_map = ColumnUInt8::create(input_rows_count, false); |
326 | 0 | auto& res_null_map = null_map->get_data(); |
327 | |
|
328 | 0 | for (size_t i = 0; i < input_rows_count; ++i) { |
329 | 0 | auto pos = pos_column.get_element(i); |
330 | 0 | res_null_map[i] = |
331 | 0 | pos <= 0 || pos > num_children || |
332 | 0 | block.get_by_position(arguments[pos]).column->get_data_at(i).data == |
333 | 0 | nullptr; |
334 | 0 | if (res_null_map[i]) { |
335 | 0 | res->insert_default(); |
336 | 0 | } else { |
337 | 0 | auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i); |
338 | 0 | res->insert_data(insert_data.data, insert_data.size); |
339 | 0 | } |
340 | 0 | } |
341 | 0 | block.get_by_position(result).column = |
342 | 0 | ColumnNullable::create(std::move(res), std::move(null_map)); |
343 | 0 | } |
344 | 358 | return Status::OK(); |
345 | 358 | } |
346 | | }; |
347 | | |
348 | | // concat_ws (string,string....) or (string, Array) |
349 | | // TODO: avoid use fmtlib |
350 | | class FunctionStringConcatWs : public IFunction { |
351 | | public: |
352 | | using Chars = ColumnString::Chars; |
353 | | using Offsets = ColumnString::Offsets; |
354 | | |
355 | | static constexpr auto name = "concat_ws"; |
356 | 459 | static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); } |
357 | 0 | String get_name() const override { return name; } |
358 | 0 | size_t get_number_of_arguments() const override { return 0; } |
359 | 452 | bool is_variadic() const override { return true; } |
360 | | |
361 | 451 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
362 | 451 | const IDataType* first_type = arguments[0].get(); |
363 | 451 | if (first_type->is_nullable()) { |
364 | 449 | return make_nullable(std::make_shared<DataTypeString>()); |
365 | 449 | } else { |
366 | 2 | return std::make_shared<DataTypeString>(); |
367 | 2 | } |
368 | 451 | } |
369 | 902 | bool use_default_implementation_for_nulls() const override { return false; } |
370 | | |
371 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
372 | 451 | uint32_t result, size_t input_rows_count) const override { |
373 | 451 | DCHECK_GE(arguments.size(), 2); |
374 | 451 | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
375 | | // we create a zero column to simply implement |
376 | 451 | auto const_null_map = ColumnUInt8::create(input_rows_count, 0); |
377 | 451 | auto res = ColumnString::create(); |
378 | 451 | bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable(); |
379 | 451 | size_t argument_size = arguments.size(); |
380 | 451 | std::vector<const Offsets*> offsets_list(argument_size); |
381 | 451 | std::vector<const Chars*> chars_list(argument_size); |
382 | 451 | std::vector<const ColumnUInt8::Container*> null_list(argument_size); |
383 | | |
384 | 451 | std::vector<ColumnPtr> argument_columns(argument_size); |
385 | 451 | std::vector<ColumnPtr> argument_null_columns(argument_size); |
386 | | |
387 | 1.54k | for (size_t i = 0; i < argument_size; ++i) { |
388 | 1.09k | argument_columns[i] = |
389 | 1.09k | block.get_by_position(arguments[i]).column->convert_to_full_column_if_const(); |
390 | 1.09k | if (const auto* nullable = |
391 | 1.09k | check_and_get_column<const ColumnNullable>(*argument_columns[i])) { |
392 | | // Danger: Here must dispose the null map data first! Because |
393 | | // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem |
394 | | // of column nullable mem of null map |
395 | 1.08k | null_list[i] = &nullable->get_null_map_data(); |
396 | 1.08k | argument_null_columns[i] = nullable->get_null_map_column_ptr(); |
397 | 1.08k | argument_columns[i] = nullable->get_nested_column_ptr(); |
398 | 1.08k | } else { |
399 | 6 | null_list[i] = &const_null_map->get_data(); |
400 | 6 | } |
401 | | |
402 | 1.09k | if (is_column<ColumnArray>(argument_columns[i].get())) { |
403 | 36 | continue; |
404 | 36 | } |
405 | | |
406 | 1.05k | const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get()); |
407 | 1.05k | offsets_list[i] = &col_str->get_offsets(); |
408 | 1.05k | chars_list[i] = &col_str->get_chars(); |
409 | 1.05k | } |
410 | | |
411 | 451 | auto& res_data = res->get_chars(); |
412 | 451 | auto& res_offset = res->get_offsets(); |
413 | 451 | res_offset.resize(input_rows_count); |
414 | | |
415 | 451 | VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]); |
416 | 451 | fmt::memory_buffer buffer; |
417 | 451 | std::vector<std::string_view> views; |
418 | | |
419 | 451 | if (is_column<ColumnArray>(argument_columns[1].get())) { |
420 | 36 | if (argument_size != 2) { |
421 | 0 | return Status::InvalidArgument( |
422 | 0 | "concat_ws with array argument expects exactly 2 arguments, but got {}", |
423 | 0 | argument_size); |
424 | 0 | } |
425 | | // Determine if the nested type of the array is String |
426 | 36 | const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]); |
427 | 36 | if (!array_column.get_data().is_column_string()) { |
428 | 0 | return Status::NotSupported( |
429 | 0 | fmt::format("unsupported nested array of type {} for function {}", |
430 | 0 | is_column_nullable(array_column.get_data()) |
431 | 0 | ? array_column.get_data().get_name() |
432 | 0 | : array_column.get_data().get_name(), |
433 | 0 | get_name())); |
434 | 0 | } |
435 | | // Concat string in array |
436 | 36 | _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list, |
437 | 36 | null_list, res_data, res_offset); |
438 | | |
439 | 415 | } else { |
440 | | // Concat string |
441 | 415 | _execute_string(input_rows_count, argument_size, buffer, views, offsets_list, |
442 | 415 | chars_list, null_list, res_data, res_offset); |
443 | 415 | } |
444 | 451 | if (is_null_type) { |
445 | 449 | block.get_by_position(result).column = |
446 | 449 | ColumnNullable::create(std::move(res), std::move(null_map)); |
447 | 449 | } else { |
448 | 2 | block.get_by_position(result).column = std::move(res); |
449 | 2 | } |
450 | 451 | return Status::OK(); |
451 | 451 | } |
452 | | |
453 | | private: |
454 | | void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column, |
455 | | fmt::memory_buffer& buffer, std::vector<std::string_view>& views, |
456 | | const std::vector<const Offsets*>& offsets_list, |
457 | | const std::vector<const Chars*>& chars_list, |
458 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
459 | 36 | Chars& res_data, Offsets& res_offset) const { |
460 | | // Get array nested column |
461 | 36 | const UInt8* array_nested_null_map = nullptr; |
462 | 36 | ColumnPtr array_nested_column = nullptr; |
463 | | |
464 | 36 | if (is_column_nullable(array_column.get_data())) { |
465 | 36 | const auto& array_nested_null_column = |
466 | 36 | reinterpret_cast<const ColumnNullable&>(array_column.get_data()); |
467 | | // String's null map in array |
468 | 36 | array_nested_null_map = |
469 | 36 | array_nested_null_column.get_null_map_column().get_data().data(); |
470 | 36 | array_nested_column = array_nested_null_column.get_nested_column_ptr(); |
471 | 36 | } else { |
472 | 0 | array_nested_column = array_column.get_data_ptr(); |
473 | 0 | } |
474 | | |
475 | 36 | const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column); |
476 | 36 | const Chars& string_src_chars = string_column.get_chars(); |
477 | 36 | const auto& src_string_offsets = string_column.get_offsets(); |
478 | 36 | const auto& src_array_offsets = array_column.get_offsets(); |
479 | 36 | size_t current_src_array_offset = 0; |
480 | 36 | auto& array_nullmap = *null_list[1]; |
481 | | |
482 | | // Concat string in array |
483 | 76 | for (size_t i = 0; i < input_rows_count; ++i) { |
484 | 40 | auto& sep_offsets = *offsets_list[0]; |
485 | 40 | auto& sep_chars = *chars_list[0]; |
486 | 40 | auto& sep_nullmap = *null_list[0]; |
487 | | |
488 | 40 | if (sep_nullmap[i]) { |
489 | 8 | res_offset[i] = res_data.size(); |
490 | 8 | current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1]; |
491 | 8 | continue; |
492 | 8 | } |
493 | | |
494 | 32 | if (array_nullmap[i]) { |
495 | 0 | StringOP::push_empty_string(i, res_data, res_offset); |
496 | 0 | current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1]; |
497 | 0 | continue; |
498 | 0 | } |
499 | | |
500 | 32 | int sep_size = sep_offsets[i] - sep_offsets[i - 1]; |
501 | 32 | const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]); |
502 | | |
503 | 32 | std::string_view sep(sep_data, sep_size); |
504 | 32 | buffer.clear(); |
505 | 32 | views.clear(); |
506 | | |
507 | 32 | for (auto next_src_array_offset = src_array_offsets[i]; |
508 | 128 | current_src_array_offset < next_src_array_offset; ++current_src_array_offset) { |
509 | 96 | const auto current_src_string_offset = |
510 | 96 | current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] |
511 | 96 | : 0; |
512 | 96 | size_t bytes_to_copy = |
513 | 96 | src_string_offsets[current_src_array_offset] - current_src_string_offset; |
514 | 96 | const char* ptr = |
515 | 96 | reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]); |
516 | | |
517 | 96 | if (array_nested_null_map == nullptr || |
518 | 96 | !array_nested_null_map[current_src_array_offset]) { |
519 | 96 | views.emplace_back(ptr, bytes_to_copy); |
520 | 96 | } |
521 | 96 | } |
522 | | |
523 | 32 | fmt::format_to(buffer, "{}", fmt::join(views, sep)); |
524 | | |
525 | 32 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
526 | 32 | res_offset); |
527 | 32 | } |
528 | 36 | } |
529 | | |
530 | | void _execute_string(const size_t& input_rows_count, const size_t& argument_size, |
531 | | fmt::memory_buffer& buffer, std::vector<std::string_view>& views, |
532 | | const std::vector<const Offsets*>& offsets_list, |
533 | | const std::vector<const Chars*>& chars_list, |
534 | | const std::vector<const ColumnUInt8::Container*>& null_list, |
535 | 415 | Chars& res_data, Offsets& res_offset) const { |
536 | | // Concat string |
537 | 951 | for (size_t i = 0; i < input_rows_count; ++i) { |
538 | 536 | auto& sep_offsets = *offsets_list[0]; |
539 | 536 | auto& sep_chars = *chars_list[0]; |
540 | 536 | auto& sep_nullmap = *null_list[0]; |
541 | 536 | if (sep_nullmap[i]) { |
542 | 72 | res_offset[i] = res_data.size(); |
543 | 72 | continue; |
544 | 72 | } |
545 | | |
546 | 464 | int sep_size = sep_offsets[i] - sep_offsets[i - 1]; |
547 | 464 | const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]); |
548 | | |
549 | 464 | std::string_view sep(sep_data, sep_size); |
550 | 464 | buffer.clear(); |
551 | 464 | views.clear(); |
552 | 1.09k | for (size_t j = 1; j < argument_size; ++j) { |
553 | 632 | auto& current_offsets = *offsets_list[j]; |
554 | 632 | auto& current_chars = *chars_list[j]; |
555 | 632 | auto& current_nullmap = *null_list[j]; |
556 | 632 | int size = current_offsets[i] - current_offsets[i - 1]; |
557 | 632 | const char* ptr = |
558 | 632 | reinterpret_cast<const char*>(¤t_chars[current_offsets[i - 1]]); |
559 | 632 | if (!current_nullmap[i]) { |
560 | 580 | views.emplace_back(ptr, size); |
561 | 580 | } |
562 | 632 | } |
563 | 464 | fmt::format_to(buffer, "{}", fmt::join(views, sep)); |
564 | 464 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
565 | 464 | res_offset); |
566 | 464 | } |
567 | 415 | } |
568 | | }; |
569 | | |
570 | | class FunctionStringRepeat : public IFunction { |
571 | | public: |
572 | | static constexpr auto name = "repeat"; |
573 | 191 | static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); } |
574 | 1 | String get_name() const override { return name; } |
575 | 183 | size_t get_number_of_arguments() const override { return 2; } |
576 | | // should set NULL value of nested data to default, |
577 | | // as iff it's not inited and invalid, the repeat result of length is so large cause overflow |
578 | 163 | bool need_replace_null_data_to_default() const override { return true; } |
579 | | |
580 | 183 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
581 | 183 | return make_nullable(std::make_shared<DataTypeString>()); |
582 | 183 | } |
583 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
584 | 165 | uint32_t result, size_t input_rows_count) const override { |
585 | 165 | DCHECK_EQ(arguments.size(), 2); |
586 | 165 | auto res = ColumnString::create(); |
587 | 165 | auto null_map = ColumnUInt8::create(); |
588 | | |
589 | 165 | ColumnPtr argument_ptr[2]; |
590 | 165 | argument_ptr[0] = |
591 | 165 | block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); |
592 | 165 | argument_ptr[1] = block.get_by_position(arguments[1]).column; |
593 | | |
594 | 165 | if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) { |
595 | 165 | if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) { |
596 | 109 | RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(), |
597 | 109 | col2->get_data(), res->get_chars(), |
598 | 109 | res->get_offsets(), null_map->get_data())); |
599 | 109 | block.replace_by_position( |
600 | 109 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
601 | 109 | return Status::OK(); |
602 | 109 | } else if (const auto* col2_const = |
603 | 56 | check_and_get_column<ColumnConst>(*argument_ptr[1])) { |
604 | 56 | DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column())); |
605 | 56 | int repeat = col2_const->get_int(0); |
606 | 56 | if (repeat <= 0) { |
607 | 18 | null_map->get_data().resize_fill(input_rows_count, 0); |
608 | 18 | res->insert_many_defaults(input_rows_count); |
609 | 38 | } else { |
610 | 38 | vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(), |
611 | 38 | res->get_offsets(), null_map->get_data()); |
612 | 38 | } |
613 | 56 | block.replace_by_position( |
614 | 56 | result, ColumnNullable::create(std::move(res), std::move(null_map))); |
615 | 56 | return Status::OK(); |
616 | 56 | } |
617 | 165 | } |
618 | | |
619 | 0 | return Status::RuntimeError("repeat function get error param: {}, {}", |
620 | 0 | argument_ptr[0]->get_name(), argument_ptr[1]->get_name()); |
621 | 165 | } |
622 | | |
623 | | Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
624 | | const ColumnInt32::Container& repeats, ColumnString::Chars& res_data, |
625 | | ColumnString::Offsets& res_offsets, |
626 | 109 | ColumnUInt8::Container& null_map) const { |
627 | 109 | size_t input_row_size = offsets.size(); |
628 | | |
629 | 109 | fmt::memory_buffer buffer; |
630 | 109 | res_offsets.resize(input_row_size); |
631 | 109 | null_map.resize_fill(input_row_size, 0); |
632 | 277 | for (ssize_t i = 0; i < input_row_size; ++i) { |
633 | 168 | buffer.clear(); |
634 | 168 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
635 | 168 | size_t size = offsets[i] - offsets[i - 1]; |
636 | 168 | int repeat = repeats[i]; |
637 | 168 | if (repeat <= 0) { |
638 | 56 | StringOP::push_empty_string(i, res_data, res_offsets); |
639 | 112 | } else { |
640 | 112 | ColumnString::check_chars_length(repeat * size + res_data.size(), 0); |
641 | 644 | for (int j = 0; j < repeat; ++j) { |
642 | 532 | buffer.append(raw_str, raw_str + size); |
643 | 532 | } |
644 | 112 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, |
645 | 112 | res_data, res_offsets); |
646 | 112 | } |
647 | 168 | } |
648 | 109 | return Status::OK(); |
649 | 109 | } |
650 | | |
651 | | // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code |
652 | | // 2. abstract the `vector_vector` and `vector_const` |
653 | | // 3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here |
654 | | void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets, |
655 | | int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets, |
656 | 38 | ColumnUInt8::Container& null_map) const { |
657 | 38 | size_t input_row_size = offsets.size(); |
658 | | |
659 | 38 | fmt::memory_buffer buffer; |
660 | 38 | res_offsets.resize(input_row_size); |
661 | 38 | null_map.resize_fill(input_row_size, 0); |
662 | 100 | for (ssize_t i = 0; i < input_row_size; ++i) { |
663 | 62 | buffer.clear(); |
664 | 62 | const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]); |
665 | 62 | size_t size = offsets[i] - offsets[i - 1]; |
666 | 62 | ColumnString::check_chars_length(repeat * size + res_data.size(), 0); |
667 | | |
668 | 285 | for (int j = 0; j < repeat; ++j) { |
669 | 223 | buffer.append(raw_str, raw_str + size); |
670 | 223 | } |
671 | 62 | StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data, |
672 | 62 | res_offsets); |
673 | 62 | } |
674 | 38 | } |
675 | | }; |
676 | | |
677 | | /// PaddingChars pre-processes the pad string for efficient padding. |
678 | | /// When is_utf8=false, character count equals byte count — no UTF-8 decoding needed. |
679 | | /// When is_utf8=true, we build a byte-offset table for code points. |
680 | | /// In both cases, the pad string is pre-expanded (doubled) until it has >= 16 characters, |
681 | | /// so that each memcpy in append_to copies at least 16 bytes at a time. |
682 | | template <bool is_utf8> |
683 | | struct PaddingChars { |
684 | | std::string pad_string; |
685 | | /// utf8_byte_offsets[i] = byte offset of i-th code point in pad_string. |
686 | | /// utf8_byte_offsets has (num_chars + 1) entries, with [0]=0 and [num_chars]=pad_string.size(). |
687 | | std::vector<size_t> utf8_byte_offsets; |
688 | | |
689 | | explicit PaddingChars(const uint8_t* data, size_t len) |
690 | 1.08k | : pad_string(reinterpret_cast<const char*>(data), len) { |
691 | 1.08k | init(); |
692 | 1.08k | } _ZN5doris12PaddingCharsILb0EEC2EPKhm Line | Count | Source | 690 | 51 | : pad_string(reinterpret_cast<const char*>(data), len) { | 691 | 51 | init(); | 692 | 51 | } |
_ZN5doris12PaddingCharsILb1EEC2EPKhm Line | Count | Source | 690 | 1.02k | : pad_string(reinterpret_cast<const char*>(data), len) { | 691 | 1.02k | init(); | 692 | 1.02k | } |
|
693 | | |
694 | 5.03k | size_t num_chars() const { |
695 | 5.03k | if constexpr (is_utf8) { |
696 | 4.84k | return utf8_byte_offsets.size() - 1; |
697 | 4.84k | } else { |
698 | 190 | return pad_string.size(); |
699 | 190 | } |
700 | 5.03k | } _ZNK5doris12PaddingCharsILb0EE9num_charsEv Line | Count | Source | 694 | 190 | size_t num_chars() const { | 695 | | if constexpr (is_utf8) { | 696 | | return utf8_byte_offsets.size() - 1; | 697 | 190 | } else { | 698 | 190 | return pad_string.size(); | 699 | 190 | } | 700 | 190 | } |
_ZNK5doris12PaddingCharsILb1EE9num_charsEv Line | Count | Source | 694 | 4.84k | size_t num_chars() const { | 695 | 4.84k | if constexpr (is_utf8) { | 696 | 4.84k | return utf8_byte_offsets.size() - 1; | 697 | | } else { | 698 | | return pad_string.size(); | 699 | | } | 700 | 4.84k | } |
|
701 | | |
702 | 19.6k | size_t chars_to_bytes(size_t n) const { |
703 | 19.6k | if constexpr (is_utf8) { |
704 | 19.5k | return utf8_byte_offsets[n]; |
705 | 19.5k | } else { |
706 | 38 | return n; |
707 | 38 | } |
708 | 19.6k | } _ZNK5doris12PaddingCharsILb0EE14chars_to_bytesEm Line | Count | Source | 702 | 38 | size_t chars_to_bytes(size_t n) const { | 703 | | if constexpr (is_utf8) { | 704 | | return utf8_byte_offsets[n]; | 705 | 38 | } else { | 706 | 38 | return n; | 707 | 38 | } | 708 | 38 | } |
_ZNK5doris12PaddingCharsILb1EE14chars_to_bytesEm Line | Count | Source | 702 | 19.5k | size_t chars_to_bytes(size_t n) const { | 703 | 19.5k | if constexpr (is_utf8) { | 704 | 19.5k | return utf8_byte_offsets[n]; | 705 | | } else { | 706 | | return n; | 707 | | } | 708 | 19.5k | } |
|
709 | | |
710 | | /// Append `num_chars_to_pad` padding characters to dst, return bytes written. |
711 | 354 | size_t append_to(uint8_t* dst, size_t num_chars_to_pad) const { |
712 | 354 | if (num_chars_to_pad == 0) { |
713 | 0 | return 0; |
714 | 0 | } |
715 | 354 | const auto* src = reinterpret_cast<const uint8_t*>(pad_string.data()); |
716 | 354 | const size_t step = num_chars(); |
717 | 354 | uint8_t* dst_start = dst; |
718 | 19.2k | while (num_chars_to_pad > step) { |
719 | 18.9k | size_t bytes = chars_to_bytes(step); |
720 | 18.9k | memcpy(dst, src, bytes); |
721 | 18.9k | dst += bytes; |
722 | 18.9k | num_chars_to_pad -= step; |
723 | 18.9k | } |
724 | 354 | size_t bytes = chars_to_bytes(num_chars_to_pad); |
725 | 354 | memcpy(dst, src, bytes); |
726 | 354 | dst += bytes; |
727 | 354 | return dst - dst_start; |
728 | 354 | } _ZNK5doris12PaddingCharsILb0EE9append_toEPhm Line | Count | Source | 711 | 19 | size_t append_to(uint8_t* dst, size_t num_chars_to_pad) const { | 712 | 19 | if (num_chars_to_pad == 0) { | 713 | 0 | return 0; | 714 | 0 | } | 715 | 19 | const auto* src = reinterpret_cast<const uint8_t*>(pad_string.data()); | 716 | 19 | const size_t step = num_chars(); | 717 | 19 | uint8_t* dst_start = dst; | 718 | 19 | while (num_chars_to_pad > step) { | 719 | 0 | size_t bytes = chars_to_bytes(step); | 720 | 0 | memcpy(dst, src, bytes); | 721 | 0 | dst += bytes; | 722 | 0 | num_chars_to_pad -= step; | 723 | 0 | } | 724 | 19 | size_t bytes = chars_to_bytes(num_chars_to_pad); | 725 | 19 | memcpy(dst, src, bytes); | 726 | 19 | dst += bytes; | 727 | 19 | return dst - dst_start; | 728 | 19 | } |
_ZNK5doris12PaddingCharsILb1EE9append_toEPhm Line | Count | Source | 711 | 335 | size_t append_to(uint8_t* dst, size_t num_chars_to_pad) const { | 712 | 335 | if (num_chars_to_pad == 0) { | 713 | 0 | return 0; | 714 | 0 | } | 715 | 335 | const auto* src = reinterpret_cast<const uint8_t*>(pad_string.data()); | 716 | 335 | const size_t step = num_chars(); | 717 | 335 | uint8_t* dst_start = dst; | 718 | 19.2k | while (num_chars_to_pad > step) { | 719 | 18.9k | size_t bytes = chars_to_bytes(step); | 720 | 18.9k | memcpy(dst, src, bytes); | 721 | 18.9k | dst += bytes; | 722 | 18.9k | num_chars_to_pad -= step; | 723 | 18.9k | } | 724 | 335 | size_t bytes = chars_to_bytes(num_chars_to_pad); | 725 | 335 | memcpy(dst, src, bytes); | 726 | 335 | dst += bytes; | 727 | 335 | return dst - dst_start; | 728 | 335 | } |
|
729 | | |
730 | | private: |
731 | 1.08k | void init() { |
732 | 1.08k | if (pad_string.empty()) { |
733 | 0 | return; |
734 | 0 | } |
735 | | |
736 | 1.08k | if constexpr (is_utf8) { |
737 | | // Build byte-offset table for each code point. |
738 | 1.02k | size_t offset = 0; |
739 | 1.02k | utf8_byte_offsets.reserve(pad_string.size() + 1); |
740 | 7.30k | while (offset < pad_string.size()) { |
741 | 6.27k | utf8_byte_offsets.push_back(offset); |
742 | 6.27k | offset += get_utf8_byte_length(static_cast<uint8_t>(pad_string[offset])); |
743 | 6.27k | offset = std::min(offset, pad_string.size()); |
744 | 6.27k | } |
745 | 1.02k | utf8_byte_offsets.push_back(pad_string.size()); |
746 | 1.02k | } |
747 | | |
748 | | // Pre-expand pad_string until it has >= 16 characters. |
749 | | // This ensures append_to() copies at least 16 bytes per iteration. |
750 | 3.58k | while (num_chars() < 16) { |
751 | 2.50k | if constexpr (is_utf8) { |
752 | 2.44k | size_t old_count = utf8_byte_offsets.size(); |
753 | 2.44k | size_t base = utf8_byte_offsets.back(); |
754 | 17.5k | for (size_t i = 1; i < old_count; ++i) { |
755 | 15.0k | utf8_byte_offsets.push_back(utf8_byte_offsets[i] + base); |
756 | 15.0k | } |
757 | 2.44k | } |
758 | 2.50k | pad_string += pad_string; |
759 | 2.50k | } |
760 | 1.08k | } _ZN5doris12PaddingCharsILb0EE4initEv Line | Count | Source | 731 | 51 | void init() { | 732 | 51 | if (pad_string.empty()) { | 733 | 0 | return; | 734 | 0 | } | 735 | | | 736 | | if constexpr (is_utf8) { | 737 | | // Build byte-offset table for each code point. | 738 | | size_t offset = 0; | 739 | | utf8_byte_offsets.reserve(pad_string.size() + 1); | 740 | | while (offset < pad_string.size()) { | 741 | | utf8_byte_offsets.push_back(offset); | 742 | | offset += get_utf8_byte_length(static_cast<uint8_t>(pad_string[offset])); | 743 | | offset = std::min(offset, pad_string.size()); | 744 | | } | 745 | | utf8_byte_offsets.push_back(pad_string.size()); | 746 | | } | 747 | | | 748 | | // Pre-expand pad_string until it has >= 16 characters. | 749 | | // This ensures append_to() copies at least 16 bytes per iteration. | 750 | 114 | while (num_chars() < 16) { | 751 | | if constexpr (is_utf8) { | 752 | | size_t old_count = utf8_byte_offsets.size(); | 753 | | size_t base = utf8_byte_offsets.back(); | 754 | | for (size_t i = 1; i < old_count; ++i) { | 755 | | utf8_byte_offsets.push_back(utf8_byte_offsets[i] + base); | 756 | | } | 757 | | } | 758 | 63 | pad_string += pad_string; | 759 | 63 | } | 760 | 51 | } |
_ZN5doris12PaddingCharsILb1EE4initEv Line | Count | Source | 731 | 1.02k | void init() { | 732 | 1.02k | if (pad_string.empty()) { | 733 | 0 | return; | 734 | 0 | } | 735 | | | 736 | 1.02k | if constexpr (is_utf8) { | 737 | | // Build byte-offset table for each code point. | 738 | 1.02k | size_t offset = 0; | 739 | 1.02k | utf8_byte_offsets.reserve(pad_string.size() + 1); | 740 | 7.30k | while (offset < pad_string.size()) { | 741 | 6.27k | utf8_byte_offsets.push_back(offset); | 742 | 6.27k | offset += get_utf8_byte_length(static_cast<uint8_t>(pad_string[offset])); | 743 | 6.27k | offset = std::min(offset, pad_string.size()); | 744 | 6.27k | } | 745 | 1.02k | utf8_byte_offsets.push_back(pad_string.size()); | 746 | 1.02k | } | 747 | | | 748 | | // Pre-expand pad_string until it has >= 16 characters. | 749 | | // This ensures append_to() copies at least 16 bytes per iteration. | 750 | 3.47k | while (num_chars() < 16) { | 751 | 2.44k | if constexpr (is_utf8) { | 752 | 2.44k | size_t old_count = utf8_byte_offsets.size(); | 753 | 2.44k | size_t base = utf8_byte_offsets.back(); | 754 | 17.5k | for (size_t i = 1; i < old_count; ++i) { | 755 | 15.0k | utf8_byte_offsets.push_back(utf8_byte_offsets[i] + base); | 756 | 15.0k | } | 757 | 2.44k | } | 758 | 2.44k | pad_string += pad_string; | 759 | 2.44k | } | 760 | 1.02k | } |
|
761 | | }; |
762 | | |
763 | | template <typename Impl> |
764 | | class FunctionStringPad : public IFunction { |
765 | | public: |
766 | | static constexpr auto name = Impl::name; |
767 | 1.68k | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv Line | Count | Source | 767 | 997 | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv Line | Count | Source | 767 | 690 | static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); } |
|
768 | 2 | String get_name() const override { return name; }_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev Line | Count | Source | 768 | 1 | String get_name() const override { return name; } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev Line | Count | Source | 768 | 1 | String get_name() const override { return name; } |
|
769 | 1.67k | size_t get_number_of_arguments() const override { return 3; }_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv Line | Count | Source | 769 | 989 | size_t get_number_of_arguments() const override { return 3; } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv Line | Count | Source | 769 | 682 | size_t get_number_of_arguments() const override { return 3; } |
|
770 | | |
771 | 1.67k | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
772 | 1.67k | return make_nullable(std::make_shared<DataTypeString>()); |
773 | 1.67k | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 771 | 989 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 772 | 989 | return make_nullable(std::make_shared<DataTypeString>()); | 773 | 989 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE Line | Count | Source | 771 | 682 | DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { | 772 | 682 | return make_nullable(std::make_shared<DataTypeString>()); | 773 | 682 | } |
|
774 | | |
775 | | Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
776 | 1.17k | uint32_t result, size_t input_rows_count) const override { |
777 | 1.17k | DCHECK_GE(arguments.size(), 3); |
778 | 1.17k | auto null_map = ColumnUInt8::create(input_rows_count, 0); |
779 | 1.17k | auto res = ColumnString::create(); |
780 | | |
781 | 1.17k | ColumnPtr col[3]; |
782 | 1.17k | bool col_const[3]; |
783 | 4.69k | for (size_t i = 0; i < 3; ++i) { |
784 | 3.52k | std::tie(col[i], col_const[i]) = |
785 | 3.52k | unpack_if_const(block.get_by_position(arguments[i]).column); |
786 | 3.52k | } |
787 | 1.17k | auto& null_map_data = null_map->get_data(); |
788 | 1.17k | auto& res_offsets = res->get_offsets(); |
789 | 1.17k | auto& res_chars = res->get_chars(); |
790 | 1.17k | res_offsets.resize(input_rows_count); |
791 | | |
792 | 1.17k | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); |
793 | 1.17k | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); |
794 | 1.17k | const auto& col_len_data = col_len->get_data(); |
795 | | |
796 | 1.17k | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); |
797 | | |
798 | 1.17k | if (col_const[1] && col_const[2]) { |
799 | 128 | auto pad = padcol->get_data_at(0); |
800 | 128 | const bool pad_all_ascii = |
801 | 128 | simd::VStringFunctions::is_ascii({pad.data, static_cast<size_t>(pad.size)}); |
802 | 128 | const bool all_ascii = pad_all_ascii && strcol->is_ascii(); |
803 | 128 | std::visit( |
804 | 128 | [&](auto str_const) { |
805 | 128 | if (all_ascii) { |
806 | 73 | execute_const_len_const_pad<true, str_const>( |
807 | 73 | *strcol, col_len_data, *padcol, res_offsets, res_chars, |
808 | 73 | null_map_data, input_rows_count); |
809 | 73 | } else { |
810 | 55 | execute_const_len_const_pad<false, str_const>( |
811 | 55 | *strcol, col_len_data, *padcol, res_offsets, res_chars, |
812 | 55 | null_map_data, input_rows_count); |
813 | 55 | } |
814 | 128 | }, _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb0EEEEDaSC_ Line | Count | Source | 804 | 64 | [&](auto str_const) { | 805 | 64 | if (all_ascii) { | 806 | 37 | execute_const_len_const_pad<true, str_const>( | 807 | 37 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 808 | 37 | null_map_data, input_rows_count); | 809 | 37 | } else { | 810 | 27 | execute_const_len_const_pad<false, str_const>( | 811 | 27 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 812 | 27 | null_map_data, input_rows_count); | 813 | 27 | } | 814 | 64 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb1EEEEDaSC_ _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb0EEEEDaSC_ Line | Count | Source | 804 | 64 | [&](auto str_const) { | 805 | 64 | if (all_ascii) { | 806 | 36 | execute_const_len_const_pad<true, str_const>( | 807 | 36 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 808 | 36 | null_map_data, input_rows_count); | 809 | 36 | } else { | 810 | 28 | execute_const_len_const_pad<false, str_const>( | 811 | 28 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 812 | 28 | null_map_data, input_rows_count); | 813 | 28 | } | 814 | 64 | }, |
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb1EEEEDaSC_ |
815 | 128 | make_bool_variant(col_const[0])); |
816 | 1.04k | } else { |
817 | 1.04k | std::visit( |
818 | 1.04k | [&](auto str_const) { |
819 | 1.04k | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, |
820 | 1.04k | col_const[2], res_offsets, res_chars, |
821 | 1.04k | null_map_data, input_rows_count); |
822 | 1.04k | }, _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb0EEEEDaSC_ Line | Count | Source | 818 | 487 | [&](auto str_const) { | 819 | 487 | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, | 820 | 487 | col_const[2], res_offsets, res_chars, | 821 | 487 | null_map_data, input_rows_count); | 822 | 487 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb1EEEEDaSC_ Line | Count | Source | 818 | 186 | [&](auto str_const) { | 819 | 186 | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, | 820 | 186 | col_const[2], res_offsets, res_chars, | 821 | 186 | null_map_data, input_rows_count); | 822 | 186 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb0EEEEDaSC_ Line | Count | Source | 818 | 187 | [&](auto str_const) { | 819 | 187 | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, | 820 | 187 | col_const[2], res_offsets, res_chars, | 821 | 187 | null_map_data, input_rows_count); | 822 | 187 | }, |
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb1EEEEDaSC_ Line | Count | Source | 818 | 186 | [&](auto str_const) { | 819 | 186 | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, | 820 | 186 | col_const[2], res_offsets, res_chars, | 821 | 186 | null_map_data, input_rows_count); | 822 | 186 | }, |
|
823 | 1.04k | make_bool_variant(col_const[0])); |
824 | 1.04k | } |
825 | | |
826 | 1.17k | block.get_by_position(result).column = |
827 | 1.17k | ColumnNullable::create(std::move(res), std::move(null_map)); |
828 | 1.17k | return Status::OK(); |
829 | 1.17k | } _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 776 | 737 | uint32_t result, size_t input_rows_count) const override { | 777 | 737 | DCHECK_GE(arguments.size(), 3); | 778 | 737 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 779 | 737 | auto res = ColumnString::create(); | 780 | | | 781 | 737 | ColumnPtr col[3]; | 782 | 737 | bool col_const[3]; | 783 | 2.94k | for (size_t i = 0; i < 3; ++i) { | 784 | 2.21k | std::tie(col[i], col_const[i]) = | 785 | 2.21k | unpack_if_const(block.get_by_position(arguments[i]).column); | 786 | 2.21k | } | 787 | 737 | auto& null_map_data = null_map->get_data(); | 788 | 737 | auto& res_offsets = res->get_offsets(); | 789 | 737 | auto& res_chars = res->get_chars(); | 790 | 737 | res_offsets.resize(input_rows_count); | 791 | | | 792 | 737 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); | 793 | 737 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); | 794 | 737 | const auto& col_len_data = col_len->get_data(); | 795 | | | 796 | 737 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); | 797 | | | 798 | 737 | if (col_const[1] && col_const[2]) { | 799 | 64 | auto pad = padcol->get_data_at(0); | 800 | 64 | const bool pad_all_ascii = | 801 | 64 | simd::VStringFunctions::is_ascii({pad.data, static_cast<size_t>(pad.size)}); | 802 | 64 | const bool all_ascii = pad_all_ascii && strcol->is_ascii(); | 803 | 64 | std::visit( | 804 | 64 | [&](auto str_const) { | 805 | 64 | if (all_ascii) { | 806 | 64 | execute_const_len_const_pad<true, str_const>( | 807 | 64 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 808 | 64 | null_map_data, input_rows_count); | 809 | 64 | } else { | 810 | 64 | execute_const_len_const_pad<false, str_const>( | 811 | 64 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 812 | 64 | null_map_data, input_rows_count); | 813 | 64 | } | 814 | 64 | }, | 815 | 64 | make_bool_variant(col_const[0])); | 816 | 673 | } else { | 817 | 673 | std::visit( | 818 | 673 | [&](auto str_const) { | 819 | 673 | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, | 820 | 673 | col_const[2], res_offsets, res_chars, | 821 | 673 | null_map_data, input_rows_count); | 822 | 673 | }, | 823 | 673 | make_bool_variant(col_const[0])); | 824 | 673 | } | 825 | | | 826 | 737 | block.get_by_position(result).column = | 827 | 737 | ColumnNullable::create(std::move(res), std::move(null_map)); | 828 | 737 | return Status::OK(); | 829 | 737 | } |
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm Line | Count | Source | 776 | 437 | uint32_t result, size_t input_rows_count) const override { | 777 | 437 | DCHECK_GE(arguments.size(), 3); | 778 | 437 | auto null_map = ColumnUInt8::create(input_rows_count, 0); | 779 | 437 | auto res = ColumnString::create(); | 780 | | | 781 | 437 | ColumnPtr col[3]; | 782 | 437 | bool col_const[3]; | 783 | 1.74k | for (size_t i = 0; i < 3; ++i) { | 784 | 1.31k | std::tie(col[i], col_const[i]) = | 785 | 1.31k | unpack_if_const(block.get_by_position(arguments[i]).column); | 786 | 1.31k | } | 787 | 437 | auto& null_map_data = null_map->get_data(); | 788 | 437 | auto& res_offsets = res->get_offsets(); | 789 | 437 | auto& res_chars = res->get_chars(); | 790 | 437 | res_offsets.resize(input_rows_count); | 791 | | | 792 | 437 | const auto* strcol = assert_cast<const ColumnString*>(col[0].get()); | 793 | 437 | const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get()); | 794 | 437 | const auto& col_len_data = col_len->get_data(); | 795 | | | 796 | 437 | const auto* padcol = assert_cast<const ColumnString*>(col[2].get()); | 797 | | | 798 | 437 | if (col_const[1] && col_const[2]) { | 799 | 64 | auto pad = padcol->get_data_at(0); | 800 | 64 | const bool pad_all_ascii = | 801 | 64 | simd::VStringFunctions::is_ascii({pad.data, static_cast<size_t>(pad.size)}); | 802 | 64 | const bool all_ascii = pad_all_ascii && strcol->is_ascii(); | 803 | 64 | std::visit( | 804 | 64 | [&](auto str_const) { | 805 | 64 | if (all_ascii) { | 806 | 64 | execute_const_len_const_pad<true, str_const>( | 807 | 64 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 808 | 64 | null_map_data, input_rows_count); | 809 | 64 | } else { | 810 | 64 | execute_const_len_const_pad<false, str_const>( | 811 | 64 | *strcol, col_len_data, *padcol, res_offsets, res_chars, | 812 | 64 | null_map_data, input_rows_count); | 813 | 64 | } | 814 | 64 | }, | 815 | 64 | make_bool_variant(col_const[0])); | 816 | 373 | } else { | 817 | 373 | std::visit( | 818 | 373 | [&](auto str_const) { | 819 | 373 | execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol, | 820 | 373 | col_const[2], res_offsets, res_chars, | 821 | 373 | null_map_data, input_rows_count); | 822 | 373 | }, | 823 | 373 | make_bool_variant(col_const[0])); | 824 | 373 | } | 825 | | | 826 | 437 | block.get_by_position(result).column = | 827 | 437 | ColumnNullable::create(std::move(res), std::move(null_map)); | 828 | 437 | return Status::OK(); | 829 | 437 | } |
|
830 | | |
831 | | private: |
832 | | template <bool is_utf8> |
833 | 875 | static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) { |
834 | 875 | if constexpr (is_utf8) { |
835 | 782 | return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data), |
836 | 782 | str_byte_len); |
837 | 782 | } |
838 | 0 | return str_byte_len; |
839 | 875 | } _ZN5doris17FunctionStringPadINS_10StringLPadEE15get_char_lengthILb0EEEmPKhm Line | Count | Source | 833 | 47 | static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) { | 834 | | if constexpr (is_utf8) { | 835 | | return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data), | 836 | | str_byte_len); | 837 | | } | 838 | 47 | return str_byte_len; | 839 | 47 | } |
_ZN5doris17FunctionStringPadINS_10StringLPadEE15get_char_lengthILb1EEEmPKhm Line | Count | Source | 833 | 541 | static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) { | 834 | 541 | if constexpr (is_utf8) { | 835 | 541 | return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data), | 836 | 541 | str_byte_len); | 837 | 541 | } | 838 | 0 | return str_byte_len; | 839 | 541 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE15get_char_lengthILb0EEEmPKhm Line | Count | Source | 833 | 46 | static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) { | 834 | | if constexpr (is_utf8) { | 835 | | return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data), | 836 | | str_byte_len); | 837 | | } | 838 | 46 | return str_byte_len; | 839 | 46 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE15get_char_lengthILb1EEEmPKhm Line | Count | Source | 833 | 241 | static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) { | 834 | 241 | if constexpr (is_utf8) { | 835 | 241 | return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data), | 836 | 241 | str_byte_len); | 837 | 241 | } | 838 | 0 | return str_byte_len; | 839 | 241 | } |
|
840 | | |
841 | | template <bool is_utf8> |
842 | | static size_t get_truncated_byte_length(const uint8_t* str_data, size_t str_byte_len, |
843 | 515 | size_t str_char_len, size_t target_len) { |
844 | 515 | if constexpr (!is_utf8) { |
845 | 74 | return target_len; |
846 | 74 | } |
847 | 515 | if (str_char_len == target_len) { |
848 | 70 | return str_byte_len; |
849 | 70 | } |
850 | 445 | auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length( |
851 | 445 | reinterpret_cast<const char*>(str_data), |
852 | 445 | reinterpret_cast<const char*>(str_data) + str_byte_len, target_len); |
853 | 445 | return byte_len; |
854 | 515 | } _ZN5doris17FunctionStringPadINS_10StringLPadEE25get_truncated_byte_lengthILb0EEEmPKhmmm Line | Count | Source | 843 | 37 | size_t str_char_len, size_t target_len) { | 844 | 37 | if constexpr (!is_utf8) { | 845 | 37 | return target_len; | 846 | 37 | } | 847 | 37 | if (str_char_len == target_len) { | 848 | 0 | return str_byte_len; | 849 | 0 | } | 850 | 37 | auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length( | 851 | 37 | reinterpret_cast<const char*>(str_data), | 852 | 37 | reinterpret_cast<const char*>(str_data) + str_byte_len, target_len); | 853 | 37 | return byte_len; | 854 | 37 | } |
_ZN5doris17FunctionStringPadINS_10StringLPadEE25get_truncated_byte_lengthILb1EEEmPKhmmm Line | Count | Source | 843 | 221 | size_t str_char_len, size_t target_len) { | 844 | | if constexpr (!is_utf8) { | 845 | | return target_len; | 846 | | } | 847 | 221 | if (str_char_len == target_len) { | 848 | 35 | return str_byte_len; | 849 | 35 | } | 850 | 186 | auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length( | 851 | 186 | reinterpret_cast<const char*>(str_data), | 852 | 186 | reinterpret_cast<const char*>(str_data) + str_byte_len, target_len); | 853 | 186 | return byte_len; | 854 | 221 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE25get_truncated_byte_lengthILb0EEEmPKhmmm Line | Count | Source | 843 | 37 | size_t str_char_len, size_t target_len) { | 844 | 37 | if constexpr (!is_utf8) { | 845 | 37 | return target_len; | 846 | 37 | } | 847 | 37 | if (str_char_len == target_len) { | 848 | 0 | return str_byte_len; | 849 | 0 | } | 850 | 37 | auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length( | 851 | 37 | reinterpret_cast<const char*>(str_data), | 852 | 37 | reinterpret_cast<const char*>(str_data) + str_byte_len, target_len); | 853 | 37 | return byte_len; | 854 | 37 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE25get_truncated_byte_lengthILb1EEEmPKhmmm Line | Count | Source | 843 | 220 | size_t str_char_len, size_t target_len) { | 844 | | if constexpr (!is_utf8) { | 845 | | return target_len; | 846 | | } | 847 | 220 | if (str_char_len == target_len) { | 848 | 35 | return str_byte_len; | 849 | 35 | } | 850 | 185 | auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length( | 851 | 185 | reinterpret_cast<const char*>(str_data), | 852 | 185 | reinterpret_cast<const char*>(str_data) + str_byte_len, target_len); | 853 | 185 | return byte_len; | 854 | 220 | } |
|
855 | | |
856 | 869 | static void ensure_capacity(ColumnString::Chars& res_chars, size_t needed, size_t row) { |
857 | 869 | if (needed <= res_chars.size()) { |
858 | 545 | return; |
859 | 545 | } |
860 | 324 | ColumnString::check_chars_length(needed, row); |
861 | 324 | res_chars.resize(std::max(needed, res_chars.size() * 3 / 2)); |
862 | 324 | } _ZN5doris17FunctionStringPadINS_10StringLPadEE15ensure_capacityERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEmm Line | Count | Source | 856 | 585 | static void ensure_capacity(ColumnString::Chars& res_chars, size_t needed, size_t row) { | 857 | 585 | if (needed <= res_chars.size()) { | 858 | 273 | return; | 859 | 273 | } | 860 | 312 | ColumnString::check_chars_length(needed, row); | 861 | 312 | res_chars.resize(std::max(needed, res_chars.size() * 3 / 2)); | 862 | 312 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE15ensure_capacityERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEmm Line | Count | Source | 856 | 284 | static void ensure_capacity(ColumnString::Chars& res_chars, size_t needed, size_t row) { | 857 | 284 | if (needed <= res_chars.size()) { | 858 | 272 | return; | 859 | 272 | } | 860 | 12 | ColumnString::check_chars_length(needed, row); | 861 | 12 | res_chars.resize(std::max(needed, res_chars.size() * 3 / 2)); | 862 | 12 | } |
|
863 | | |
864 | | template <bool is_utf8> |
865 | | static size_t estimate_const_output_bytes(const ColumnString::Chars& strcol_chars, |
866 | | int target_len, size_t input_rows_count, |
867 | 128 | const PaddingChars<is_utf8>* padding) { |
868 | 128 | if (target_len <= 0) { |
869 | 96 | return 0; |
870 | 96 | } |
871 | 32 | if constexpr (!is_utf8) { |
872 | 19 | return static_cast<size_t>(target_len) * input_rows_count; |
873 | 19 | } |
874 | 32 | if (padding != nullptr && padding->num_chars() > 0) { |
875 | 11 | size_t pad_bytes_per_char = |
876 | 11 | (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars(); |
877 | 11 | return strcol_chars.size() + |
878 | 11 | static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count; |
879 | 11 | } |
880 | 21 | return strcol_chars.size(); |
881 | 32 | } _ZN5doris17FunctionStringPadINS_10StringLPadEE27estimate_const_output_bytesILb0EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE Line | Count | Source | 867 | 37 | const PaddingChars<is_utf8>* padding) { | 868 | 37 | if (target_len <= 0) { | 869 | 27 | return 0; | 870 | 27 | } | 871 | 10 | if constexpr (!is_utf8) { | 872 | 10 | return static_cast<size_t>(target_len) * input_rows_count; | 873 | 10 | } | 874 | 10 | if (padding != nullptr && padding->num_chars() > 0) { | 875 | 0 | size_t pad_bytes_per_char = | 876 | 0 | (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars(); | 877 | 0 | return strcol_chars.size() + | 878 | 0 | static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count; | 879 | 0 | } | 880 | 10 | return strcol_chars.size(); | 881 | 10 | } |
_ZN5doris17FunctionStringPadINS_10StringLPadEE27estimate_const_output_bytesILb1EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE Line | Count | Source | 867 | 27 | const PaddingChars<is_utf8>* padding) { | 868 | 27 | if (target_len <= 0) { | 869 | 21 | return 0; | 870 | 21 | } | 871 | | if constexpr (!is_utf8) { | 872 | | return static_cast<size_t>(target_len) * input_rows_count; | 873 | | } | 874 | 6 | if (padding != nullptr && padding->num_chars() > 0) { | 875 | 5 | size_t pad_bytes_per_char = | 876 | 5 | (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars(); | 877 | 5 | return strcol_chars.size() + | 878 | 5 | static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count; | 879 | 5 | } | 880 | 1 | return strcol_chars.size(); | 881 | 6 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE27estimate_const_output_bytesILb0EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE Line | Count | Source | 867 | 36 | const PaddingChars<is_utf8>* padding) { | 868 | 36 | if (target_len <= 0) { | 869 | 27 | return 0; | 870 | 27 | } | 871 | 9 | if constexpr (!is_utf8) { | 872 | 9 | return static_cast<size_t>(target_len) * input_rows_count; | 873 | 9 | } | 874 | 9 | if (padding != nullptr && padding->num_chars() > 0) { | 875 | 0 | size_t pad_bytes_per_char = | 876 | 0 | (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars(); | 877 | 0 | return strcol_chars.size() + | 878 | 0 | static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count; | 879 | 0 | } | 880 | 9 | return strcol_chars.size(); | 881 | 9 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE27estimate_const_output_bytesILb1EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE Line | Count | Source | 867 | 28 | const PaddingChars<is_utf8>* padding) { | 868 | 28 | if (target_len <= 0) { | 869 | 21 | return 0; | 870 | 21 | } | 871 | | if constexpr (!is_utf8) { | 872 | | return static_cast<size_t>(target_len) * input_rows_count; | 873 | | } | 874 | 7 | if (padding != nullptr && padding->num_chars() > 0) { | 875 | 6 | size_t pad_bytes_per_char = | 876 | 6 | (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars(); | 877 | 6 | return strcol_chars.size() + | 878 | 6 | static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count; | 879 | 6 | } | 880 | 1 | return strcol_chars.size(); | 881 | 7 | } |
|
882 | | |
883 | | template <bool is_utf8> |
884 | | static void append_result_row(const uint8_t* str_data, size_t str_byte_len, int target_len, |
885 | | const PaddingChars<is_utf8>* padding, |
886 | | ColumnString::Chars& res_chars, |
887 | | ColumnString::Offsets& res_offsets, |
888 | | ColumnUInt8::Container& null_map_data, size_t row, |
889 | 1.42k | size_t& dst_offset) { |
890 | 1.42k | if (target_len < 0) { |
891 | 548 | null_map_data[row] = true; |
892 | 548 | res_offsets[row] = dst_offset; |
893 | 548 | return; |
894 | 548 | } |
895 | | |
896 | 875 | const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len); |
897 | 875 | const size_t target_char_len = static_cast<size_t>(target_len); |
898 | 875 | if (str_char_len >= target_char_len) { |
899 | 515 | const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>( |
900 | 515 | str_data, str_byte_len, str_char_len, target_char_len); |
901 | 515 | const size_t needed = dst_offset + truncated_byte_len; |
902 | 515 | ensure_capacity(res_chars, needed, row); |
903 | 515 | memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len); |
904 | 515 | dst_offset += truncated_byte_len; |
905 | 515 | res_offsets[row] = dst_offset; |
906 | 515 | return; |
907 | 515 | } |
908 | | |
909 | 360 | if (padding == nullptr || padding->num_chars() == 0) { |
910 | 6 | res_offsets[row] = dst_offset; |
911 | 6 | return; |
912 | 6 | } |
913 | | |
914 | 354 | const size_t pad_char_count = target_char_len - str_char_len; |
915 | 354 | const size_t full_cycles = pad_char_count / padding->num_chars(); |
916 | 354 | const size_t remainder_chars = pad_char_count % padding->num_chars(); |
917 | 354 | const size_t pad_bytes = |
918 | 354 | full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars); |
919 | 354 | const size_t needed = dst_offset + str_byte_len + pad_bytes; |
920 | 354 | ensure_capacity(res_chars, needed, row); |
921 | | |
922 | 354 | if constexpr (Impl::is_lpad) { |
923 | 327 | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); |
924 | 327 | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); |
925 | 327 | dst_offset += str_byte_len; |
926 | 327 | } else { |
927 | 27 | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); |
928 | 27 | dst_offset += str_byte_len; |
929 | 27 | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); |
930 | 27 | } |
931 | 354 | res_offsets[row] = dst_offset; |
932 | 354 | } _ZN5doris17FunctionStringPadINS_10StringLPadEE17append_result_rowILb0EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm Line | Count | Source | 889 | 65 | size_t& dst_offset) { | 890 | 65 | if (target_len < 0) { | 891 | 18 | null_map_data[row] = true; | 892 | 18 | res_offsets[row] = dst_offset; | 893 | 18 | return; | 894 | 18 | } | 895 | | | 896 | 47 | const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len); | 897 | 47 | const size_t target_char_len = static_cast<size_t>(target_len); | 898 | 47 | if (str_char_len >= target_char_len) { | 899 | 37 | const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>( | 900 | 37 | str_data, str_byte_len, str_char_len, target_char_len); | 901 | 37 | const size_t needed = dst_offset + truncated_byte_len; | 902 | 37 | ensure_capacity(res_chars, needed, row); | 903 | 37 | memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len); | 904 | 37 | dst_offset += truncated_byte_len; | 905 | 37 | res_offsets[row] = dst_offset; | 906 | 37 | return; | 907 | 37 | } | 908 | | | 909 | 10 | if (padding == nullptr || padding->num_chars() == 0) { | 910 | 0 | res_offsets[row] = dst_offset; | 911 | 0 | return; | 912 | 0 | } | 913 | | | 914 | 10 | const size_t pad_char_count = target_char_len - str_char_len; | 915 | 10 | const size_t full_cycles = pad_char_count / padding->num_chars(); | 916 | 10 | const size_t remainder_chars = pad_char_count % padding->num_chars(); | 917 | 10 | const size_t pad_bytes = | 918 | 10 | full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars); | 919 | 10 | const size_t needed = dst_offset + str_byte_len + pad_bytes; | 920 | 10 | ensure_capacity(res_chars, needed, row); | 921 | | | 922 | 10 | if constexpr (Impl::is_lpad) { | 923 | 10 | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 924 | 10 | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 925 | 10 | dst_offset += str_byte_len; | 926 | | } else { | 927 | | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 928 | | dst_offset += str_byte_len; | 929 | | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 930 | | } | 931 | 10 | res_offsets[row] = dst_offset; | 932 | 10 | } |
_ZN5doris17FunctionStringPadINS_10StringLPadEE17append_result_rowILb1EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm Line | Count | Source | 889 | 797 | size_t& dst_offset) { | 890 | 797 | if (target_len < 0) { | 891 | 256 | null_map_data[row] = true; | 892 | 256 | res_offsets[row] = dst_offset; | 893 | 256 | return; | 894 | 256 | } | 895 | | | 896 | 541 | const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len); | 897 | 541 | const size_t target_char_len = static_cast<size_t>(target_len); | 898 | 541 | if (str_char_len >= target_char_len) { | 899 | 221 | const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>( | 900 | 221 | str_data, str_byte_len, str_char_len, target_char_len); | 901 | 221 | const size_t needed = dst_offset + truncated_byte_len; | 902 | 221 | ensure_capacity(res_chars, needed, row); | 903 | 221 | memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len); | 904 | 221 | dst_offset += truncated_byte_len; | 905 | 221 | res_offsets[row] = dst_offset; | 906 | 221 | return; | 907 | 221 | } | 908 | | | 909 | 320 | if (padding == nullptr || padding->num_chars() == 0) { | 910 | 3 | res_offsets[row] = dst_offset; | 911 | 3 | return; | 912 | 3 | } | 913 | | | 914 | 317 | const size_t pad_char_count = target_char_len - str_char_len; | 915 | 317 | const size_t full_cycles = pad_char_count / padding->num_chars(); | 916 | 317 | const size_t remainder_chars = pad_char_count % padding->num_chars(); | 917 | 317 | const size_t pad_bytes = | 918 | 317 | full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars); | 919 | 317 | const size_t needed = dst_offset + str_byte_len + pad_bytes; | 920 | 317 | ensure_capacity(res_chars, needed, row); | 921 | | | 922 | 317 | if constexpr (Impl::is_lpad) { | 923 | 317 | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 924 | 317 | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 925 | 317 | dst_offset += str_byte_len; | 926 | | } else { | 927 | | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 928 | | dst_offset += str_byte_len; | 929 | | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 930 | | } | 931 | 317 | res_offsets[row] = dst_offset; | 932 | 317 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE17append_result_rowILb0EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm Line | Count | Source | 889 | 64 | size_t& dst_offset) { | 890 | 64 | if (target_len < 0) { | 891 | 18 | null_map_data[row] = true; | 892 | 18 | res_offsets[row] = dst_offset; | 893 | 18 | return; | 894 | 18 | } | 895 | | | 896 | 46 | const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len); | 897 | 46 | const size_t target_char_len = static_cast<size_t>(target_len); | 898 | 46 | if (str_char_len >= target_char_len) { | 899 | 37 | const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>( | 900 | 37 | str_data, str_byte_len, str_char_len, target_char_len); | 901 | 37 | const size_t needed = dst_offset + truncated_byte_len; | 902 | 37 | ensure_capacity(res_chars, needed, row); | 903 | 37 | memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len); | 904 | 37 | dst_offset += truncated_byte_len; | 905 | 37 | res_offsets[row] = dst_offset; | 906 | 37 | return; | 907 | 37 | } | 908 | | | 909 | 9 | if (padding == nullptr || padding->num_chars() == 0) { | 910 | 0 | res_offsets[row] = dst_offset; | 911 | 0 | return; | 912 | 0 | } | 913 | | | 914 | 9 | const size_t pad_char_count = target_char_len - str_char_len; | 915 | 9 | const size_t full_cycles = pad_char_count / padding->num_chars(); | 916 | 9 | const size_t remainder_chars = pad_char_count % padding->num_chars(); | 917 | 9 | const size_t pad_bytes = | 918 | 9 | full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars); | 919 | 9 | const size_t needed = dst_offset + str_byte_len + pad_bytes; | 920 | 9 | ensure_capacity(res_chars, needed, row); | 921 | | | 922 | | if constexpr (Impl::is_lpad) { | 923 | | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 924 | | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 925 | | dst_offset += str_byte_len; | 926 | 9 | } else { | 927 | 9 | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 928 | 9 | dst_offset += str_byte_len; | 929 | 9 | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 930 | 9 | } | 931 | 9 | res_offsets[row] = dst_offset; | 932 | 9 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE17append_result_rowILb1EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm Line | Count | Source | 889 | 497 | size_t& dst_offset) { | 890 | 497 | if (target_len < 0) { | 891 | 256 | null_map_data[row] = true; | 892 | 256 | res_offsets[row] = dst_offset; | 893 | 256 | return; | 894 | 256 | } | 895 | | | 896 | 241 | const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len); | 897 | 241 | const size_t target_char_len = static_cast<size_t>(target_len); | 898 | 241 | if (str_char_len >= target_char_len) { | 899 | 220 | const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>( | 900 | 220 | str_data, str_byte_len, str_char_len, target_char_len); | 901 | 220 | const size_t needed = dst_offset + truncated_byte_len; | 902 | 220 | ensure_capacity(res_chars, needed, row); | 903 | 220 | memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len); | 904 | 220 | dst_offset += truncated_byte_len; | 905 | 220 | res_offsets[row] = dst_offset; | 906 | 220 | return; | 907 | 220 | } | 908 | | | 909 | 21 | if (padding == nullptr || padding->num_chars() == 0) { | 910 | 3 | res_offsets[row] = dst_offset; | 911 | 3 | return; | 912 | 3 | } | 913 | | | 914 | 18 | const size_t pad_char_count = target_char_len - str_char_len; | 915 | 18 | const size_t full_cycles = pad_char_count / padding->num_chars(); | 916 | 18 | const size_t remainder_chars = pad_char_count % padding->num_chars(); | 917 | 18 | const size_t pad_bytes = | 918 | 18 | full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars); | 919 | 18 | const size_t needed = dst_offset + str_byte_len + pad_bytes; | 920 | 18 | ensure_capacity(res_chars, needed, row); | 921 | | | 922 | | if constexpr (Impl::is_lpad) { | 923 | | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 924 | | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 925 | | dst_offset += str_byte_len; | 926 | 18 | } else { | 927 | 18 | memcpy(res_chars.data() + dst_offset, str_data, str_byte_len); | 928 | 18 | dst_offset += str_byte_len; | 929 | 18 | dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count); | 930 | 18 | } | 931 | 18 | res_offsets[row] = dst_offset; | 932 | 18 | } |
|
933 | | |
934 | | template <bool all_ascii, bool str_const> |
935 | | static void execute_const_len_const_pad(const ColumnString& strcol, |
936 | | const ColumnInt32::Container& col_len_data, |
937 | | const ColumnString& padcol, |
938 | | ColumnString::Offsets& res_offsets, |
939 | | ColumnString::Chars& res_chars, |
940 | | ColumnUInt8::Container& null_map_data, |
941 | 128 | size_t input_rows_count) { |
942 | 128 | constexpr bool is_utf8 = !all_ascii; |
943 | 128 | using PadChars = PaddingChars<is_utf8>; |
944 | | |
945 | 128 | const int target_len = col_len_data[0]; |
946 | 128 | std::optional<PadChars> padding; |
947 | 128 | const auto pad = padcol.get_data_at(0); |
948 | 128 | if (!pad.empty()) { |
949 | 98 | padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); |
950 | 98 | } |
951 | | |
952 | 128 | const PadChars* padding_ptr = padding ? &*padding : nullptr; |
953 | 128 | const size_t estimated_total = estimate_const_output_bytes<is_utf8>( |
954 | 128 | strcol.get_chars(), target_len, input_rows_count, padding_ptr); |
955 | 128 | if (estimated_total > 0) { |
956 | 32 | ColumnString::check_chars_length(estimated_total, 0, input_rows_count); |
957 | 32 | } |
958 | 128 | res_chars.resize(estimated_total); |
959 | | |
960 | 128 | size_t dst_offset = 0; |
961 | 312 | for (size_t i = 0; i < input_rows_count; ++i) { |
962 | 184 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); |
963 | 184 | append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size, |
964 | 184 | target_len, padding_ptr, res_chars, res_offsets, |
965 | 184 | null_map_data, i, dst_offset); |
966 | 184 | } |
967 | 128 | res_chars.resize(dst_offset); |
968 | 128 | } _ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 941 | 37 | size_t input_rows_count) { | 942 | 37 | constexpr bool is_utf8 = !all_ascii; | 943 | 37 | using PadChars = PaddingChars<is_utf8>; | 944 | | | 945 | 37 | const int target_len = col_len_data[0]; | 946 | 37 | std::optional<PadChars> padding; | 947 | 37 | const auto pad = padcol.get_data_at(0); | 948 | 37 | if (!pad.empty()) { | 949 | 26 | padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 950 | 26 | } | 951 | | | 952 | 37 | const PadChars* padding_ptr = padding ? &*padding : nullptr; | 953 | 37 | const size_t estimated_total = estimate_const_output_bytes<is_utf8>( | 954 | 37 | strcol.get_chars(), target_len, input_rows_count, padding_ptr); | 955 | 37 | if (estimated_total > 0) { | 956 | 10 | ColumnString::check_chars_length(estimated_total, 0, input_rows_count); | 957 | 10 | } | 958 | 37 | res_chars.resize(estimated_total); | 959 | | | 960 | 37 | size_t dst_offset = 0; | 961 | 102 | for (size_t i = 0; i < input_rows_count; ++i) { | 962 | 65 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 963 | 65 | append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 964 | 65 | target_len, padding_ptr, res_chars, res_offsets, | 965 | 65 | null_map_data, i, dst_offset); | 966 | 65 | } | 967 | 37 | res_chars.resize(dst_offset); | 968 | 37 | } |
_ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 941 | 27 | size_t input_rows_count) { | 942 | 27 | constexpr bool is_utf8 = !all_ascii; | 943 | 27 | using PadChars = PaddingChars<is_utf8>; | 944 | | | 945 | 27 | const int target_len = col_len_data[0]; | 946 | 27 | std::optional<PadChars> padding; | 947 | 27 | const auto pad = padcol.get_data_at(0); | 948 | 27 | if (!pad.empty()) { | 949 | 23 | padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 950 | 23 | } | 951 | | | 952 | 27 | const PadChars* padding_ptr = padding ? &*padding : nullptr; | 953 | 27 | const size_t estimated_total = estimate_const_output_bytes<is_utf8>( | 954 | 27 | strcol.get_chars(), target_len, input_rows_count, padding_ptr); | 955 | 27 | if (estimated_total > 0) { | 956 | 6 | ColumnString::check_chars_length(estimated_total, 0, input_rows_count); | 957 | 6 | } | 958 | 27 | res_chars.resize(estimated_total); | 959 | | | 960 | 27 | size_t dst_offset = 0; | 961 | 54 | for (size_t i = 0; i < input_rows_count; ++i) { | 962 | 27 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 963 | 27 | append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 964 | 27 | target_len, padding_ptr, res_chars, res_offsets, | 965 | 27 | null_map_data, i, dst_offset); | 966 | 27 | } | 967 | 27 | res_chars.resize(dst_offset); | 968 | 27 | } |
Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m _ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 941 | 36 | size_t input_rows_count) { | 942 | 36 | constexpr bool is_utf8 = !all_ascii; | 943 | 36 | using PadChars = PaddingChars<is_utf8>; | 944 | | | 945 | 36 | const int target_len = col_len_data[0]; | 946 | 36 | std::optional<PadChars> padding; | 947 | 36 | const auto pad = padcol.get_data_at(0); | 948 | 36 | if (!pad.empty()) { | 949 | 25 | padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 950 | 25 | } | 951 | | | 952 | 36 | const PadChars* padding_ptr = padding ? &*padding : nullptr; | 953 | 36 | const size_t estimated_total = estimate_const_output_bytes<is_utf8>( | 954 | 36 | strcol.get_chars(), target_len, input_rows_count, padding_ptr); | 955 | 36 | if (estimated_total > 0) { | 956 | 9 | ColumnString::check_chars_length(estimated_total, 0, input_rows_count); | 957 | 9 | } | 958 | 36 | res_chars.resize(estimated_total); | 959 | | | 960 | 36 | size_t dst_offset = 0; | 961 | 100 | for (size_t i = 0; i < input_rows_count; ++i) { | 962 | 64 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 963 | 64 | append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 964 | 64 | target_len, padding_ptr, res_chars, res_offsets, | 965 | 64 | null_map_data, i, dst_offset); | 966 | 64 | } | 967 | 36 | res_chars.resize(dst_offset); | 968 | 36 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 941 | 28 | size_t input_rows_count) { | 942 | 28 | constexpr bool is_utf8 = !all_ascii; | 943 | 28 | using PadChars = PaddingChars<is_utf8>; | 944 | | | 945 | 28 | const int target_len = col_len_data[0]; | 946 | 28 | std::optional<PadChars> padding; | 947 | 28 | const auto pad = padcol.get_data_at(0); | 948 | 28 | if (!pad.empty()) { | 949 | 24 | padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 950 | 24 | } | 951 | | | 952 | 28 | const PadChars* padding_ptr = padding ? &*padding : nullptr; | 953 | 28 | const size_t estimated_total = estimate_const_output_bytes<is_utf8>( | 954 | 28 | strcol.get_chars(), target_len, input_rows_count, padding_ptr); | 955 | 28 | if (estimated_total > 0) { | 956 | 7 | ColumnString::check_chars_length(estimated_total, 0, input_rows_count); | 957 | 7 | } | 958 | 28 | res_chars.resize(estimated_total); | 959 | | | 960 | 28 | size_t dst_offset = 0; | 961 | 56 | for (size_t i = 0; i < input_rows_count; ++i) { | 962 | 28 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 963 | 28 | append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 964 | 28 | target_len, padding_ptr, res_chars, res_offsets, | 965 | 28 | null_map_data, i, dst_offset); | 966 | 28 | } | 967 | 28 | res_chars.resize(dst_offset); | 968 | 28 | } |
Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m |
969 | | |
970 | | template <bool str_const> |
971 | | static void execute_general(const ColumnString& strcol, |
972 | | const ColumnInt32::Container& col_len_data, bool len_const, |
973 | | const ColumnString& padcol, bool pad_const, |
974 | | ColumnString::Offsets& res_offsets, ColumnString::Chars& res_chars, |
975 | 1.04k | ColumnUInt8::Container& null_map_data, size_t input_rows_count) { |
976 | 1.04k | using PadChars = PaddingChars<true>; |
977 | 1.04k | std::optional<PadChars> const_padding; |
978 | 1.04k | const PadChars* const_padding_ptr = nullptr; |
979 | 1.04k | if (pad_const) { |
980 | 248 | auto pad = padcol.get_data_at(0); |
981 | 248 | if (!pad.empty()) { |
982 | 188 | const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); |
983 | 188 | const_padding_ptr = &*const_padding; |
984 | 188 | } |
985 | 248 | } |
986 | | |
987 | 1.04k | res_chars.resize(strcol.get_chars().size()); |
988 | 1.04k | size_t dst_offset = 0; |
989 | 2.28k | for (size_t i = 0; i < input_rows_count; ++i) { |
990 | 1.23k | auto str = strcol.get_data_at(index_check_const<str_const>(i)); |
991 | 1.23k | const int target_len = col_len_data[len_const ? 0 : i]; |
992 | | |
993 | 1.23k | const PadChars* padding_ptr = const_padding_ptr; |
994 | 1.23k | std::optional<PadChars> row_padding; |
995 | 1.23k | if (!pad_const) { |
996 | 991 | auto pad = padcol.get_data_at(i); |
997 | 991 | if (!pad.empty()) { |
998 | 794 | row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); |
999 | 794 | padding_ptr = &*row_padding; |
1000 | 794 | } else { |
1001 | 197 | padding_ptr = nullptr; |
1002 | 197 | } |
1003 | 991 | } |
1004 | | |
1005 | 1.23k | append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size, |
1006 | 1.23k | target_len, padding_ptr, res_chars, res_offsets, null_map_data, |
1007 | 1.23k | i, dst_offset); |
1008 | 1.23k | } |
1009 | 1.04k | res_chars.resize(dst_offset); |
1010 | 1.04k | } _ZN5doris17FunctionStringPadINS_10StringLPadEE15execute_generalILb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 975 | 487 | ColumnUInt8::Container& null_map_data, size_t input_rows_count) { | 976 | 487 | using PadChars = PaddingChars<true>; | 977 | 487 | std::optional<PadChars> const_padding; | 978 | 487 | const PadChars* const_padding_ptr = nullptr; | 979 | 487 | if (pad_const) { | 980 | 62 | auto pad = padcol.get_data_at(0); | 981 | 62 | if (!pad.empty()) { | 982 | 47 | const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 983 | 47 | const_padding_ptr = &*const_padding; | 984 | 47 | } | 985 | 62 | } | 986 | | | 987 | 487 | res_chars.resize(strcol.get_chars().size()); | 988 | 487 | size_t dst_offset = 0; | 989 | 1.07k | for (size_t i = 0; i < input_rows_count; ++i) { | 990 | 584 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 991 | 584 | const int target_len = col_len_data[len_const ? 0 : i]; | 992 | | | 993 | 584 | const PadChars* padding_ptr = const_padding_ptr; | 994 | 584 | std::optional<PadChars> row_padding; | 995 | 584 | if (!pad_const) { | 996 | 522 | auto pad = padcol.get_data_at(i); | 997 | 522 | if (!pad.empty()) { | 998 | 453 | row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 999 | 453 | padding_ptr = &*row_padding; | 1000 | 453 | } else { | 1001 | 69 | padding_ptr = nullptr; | 1002 | 69 | } | 1003 | 522 | } | 1004 | | | 1005 | 584 | append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 1006 | 584 | target_len, padding_ptr, res_chars, res_offsets, null_map_data, | 1007 | 584 | i, dst_offset); | 1008 | 584 | } | 1009 | 487 | res_chars.resize(dst_offset); | 1010 | 487 | } |
_ZN5doris17FunctionStringPadINS_10StringLPadEE15execute_generalILb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 975 | 186 | ColumnUInt8::Container& null_map_data, size_t input_rows_count) { | 976 | 186 | using PadChars = PaddingChars<true>; | 977 | 186 | std::optional<PadChars> const_padding; | 978 | 186 | const PadChars* const_padding_ptr = nullptr; | 979 | 186 | if (pad_const) { | 980 | 62 | auto pad = padcol.get_data_at(0); | 981 | 62 | if (!pad.empty()) { | 982 | 47 | const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 983 | 47 | const_padding_ptr = &*const_padding; | 984 | 47 | } | 985 | 62 | } | 986 | | | 987 | 186 | res_chars.resize(strcol.get_chars().size()); | 988 | 186 | size_t dst_offset = 0; | 989 | 372 | for (size_t i = 0; i < input_rows_count; ++i) { | 990 | 186 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 991 | 186 | const int target_len = col_len_data[len_const ? 0 : i]; | 992 | | | 993 | 186 | const PadChars* padding_ptr = const_padding_ptr; | 994 | 186 | std::optional<PadChars> row_padding; | 995 | 186 | if (!pad_const) { | 996 | 124 | auto pad = padcol.get_data_at(i); | 997 | 124 | if (!pad.empty()) { | 998 | 94 | row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 999 | 94 | padding_ptr = &*row_padding; | 1000 | 94 | } else { | 1001 | 30 | padding_ptr = nullptr; | 1002 | 30 | } | 1003 | 124 | } | 1004 | | | 1005 | 186 | append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 1006 | 186 | target_len, padding_ptr, res_chars, res_offsets, null_map_data, | 1007 | 186 | i, dst_offset); | 1008 | 186 | } | 1009 | 186 | res_chars.resize(dst_offset); | 1010 | 186 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE15execute_generalILb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 975 | 187 | ColumnUInt8::Container& null_map_data, size_t input_rows_count) { | 976 | 187 | using PadChars = PaddingChars<true>; | 977 | 187 | std::optional<PadChars> const_padding; | 978 | 187 | const PadChars* const_padding_ptr = nullptr; | 979 | 187 | if (pad_const) { | 980 | 62 | auto pad = padcol.get_data_at(0); | 981 | 62 | if (!pad.empty()) { | 982 | 47 | const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 983 | 47 | const_padding_ptr = &*const_padding; | 984 | 47 | } | 985 | 62 | } | 986 | | | 987 | 187 | res_chars.resize(strcol.get_chars().size()); | 988 | 187 | size_t dst_offset = 0; | 989 | 470 | for (size_t i = 0; i < input_rows_count; ++i) { | 990 | 283 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 991 | 283 | const int target_len = col_len_data[len_const ? 0 : i]; | 992 | | | 993 | 283 | const PadChars* padding_ptr = const_padding_ptr; | 994 | 283 | std::optional<PadChars> row_padding; | 995 | 283 | if (!pad_const) { | 996 | 221 | auto pad = padcol.get_data_at(i); | 997 | 221 | if (!pad.empty()) { | 998 | 153 | row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 999 | 153 | padding_ptr = &*row_padding; | 1000 | 153 | } else { | 1001 | 68 | padding_ptr = nullptr; | 1002 | 68 | } | 1003 | 221 | } | 1004 | | | 1005 | 283 | append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 1006 | 283 | target_len, padding_ptr, res_chars, res_offsets, null_map_data, | 1007 | 283 | i, dst_offset); | 1008 | 283 | } | 1009 | 187 | res_chars.resize(dst_offset); | 1010 | 187 | } |
_ZN5doris17FunctionStringPadINS_10StringRPadEE15execute_generalILb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m Line | Count | Source | 975 | 186 | ColumnUInt8::Container& null_map_data, size_t input_rows_count) { | 976 | 186 | using PadChars = PaddingChars<true>; | 977 | 186 | std::optional<PadChars> const_padding; | 978 | 186 | const PadChars* const_padding_ptr = nullptr; | 979 | 186 | if (pad_const) { | 980 | 62 | auto pad = padcol.get_data_at(0); | 981 | 62 | if (!pad.empty()) { | 982 | 47 | const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 983 | 47 | const_padding_ptr = &*const_padding; | 984 | 47 | } | 985 | 62 | } | 986 | | | 987 | 186 | res_chars.resize(strcol.get_chars().size()); | 988 | 186 | size_t dst_offset = 0; | 989 | 372 | for (size_t i = 0; i < input_rows_count; ++i) { | 990 | 186 | auto str = strcol.get_data_at(index_check_const<str_const>(i)); | 991 | 186 | const int target_len = col_len_data[len_const ? 0 : i]; | 992 | | | 993 | 186 | const PadChars* padding_ptr = const_padding_ptr; | 994 | 186 | std::optional<PadChars> row_padding; | 995 | 186 | if (!pad_const) { | 996 | 124 | auto pad = padcol.get_data_at(i); | 997 | 124 | if (!pad.empty()) { | 998 | 94 | row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size); | 999 | 94 | padding_ptr = &*row_padding; | 1000 | 94 | } else { | 1001 | 30 | padding_ptr = nullptr; | 1002 | 30 | } | 1003 | 124 | } | 1004 | | | 1005 | 186 | append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size, | 1006 | 186 | target_len, padding_ptr, res_chars, res_offsets, null_map_data, | 1007 | 186 | i, dst_offset); | 1008 | 186 | } | 1009 | 186 | res_chars.resize(dst_offset); | 1010 | 186 | } |
|
1011 | | }; |
1012 | | |
1013 | | #include "common/compile_check_avoid_end.h" |
1014 | | } // namespace doris |