Coverage Report

Created: 2026-05-15 02:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string_concat.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <fmt/format.h>
21
22
#include <cstddef>
23
#include <cstring>
24
#include <string>
25
#include <string_view>
26
#include <vector>
27
28
#include "common/status.h"
29
#include "core/assert_cast.h"
30
#include "core/block/block.h"
31
#include "core/block/column_numbers.h"
32
#include "core/column/column.h"
33
#include "core/column/column_array.h"
34
#include "core/column/column_const.h"
35
#include "core/column/column_nullable.h"
36
#include "core/column/column_string.h"
37
#include "core/column/column_vector.h"
38
#include "core/data_type/data_type_array.h"
39
#include "core/data_type/data_type_nullable.h"
40
#include "core/data_type/data_type_number.h"
41
#include "core/data_type/data_type_string.h"
42
#include "core/memcpy_small.h"
43
#include "core/string_ref.h"
44
#include "exec/common/stringop_substring.h"
45
#include "exec/common/template_helpers.hpp"
46
#include "exec/common/util.hpp"
47
#include "exprs/function/function.h"
48
#include "exprs/function/function_helpers.h"
49
#include "exprs/function_context.h"
50
#include "util/simd/vstring_function.h"
51
52
namespace doris {
53
#include "common/compile_check_avoid_begin.h"
54
55
class FunctionStringConcat : public IFunction {
56
public:
57
    struct ConcatState {
58
        bool use_state = false;
59
        std::string tail;
60
    };
61
62
    static constexpr auto name = "concat";
63
2.04k
    static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); }
64
0
    String get_name() const override { return name; }
65
0
    size_t get_number_of_arguments() const override { return 0; }
66
2.03k
    bool is_variadic() const override { return true; }
67
68
2.03k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
69
2.03k
        return std::make_shared<DataTypeString>();
70
2.03k
    }
71
72
6.01k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
73
6.01k
        if (scope == FunctionContext::THREAD_LOCAL) {
74
3.97k
            return Status::OK();
75
3.97k
        }
76
2.03k
        std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>();
77
78
2.03k
        context->set_function_state(scope, state);
79
80
2.03k
        state->use_state = true;
81
82
        // Optimize function calls like this:
83
        // concat(col, "123", "abc", "456") -> tail = "123abc456"
84
3.91k
        for (size_t i = 1; i < context->get_num_args(); i++) {
85
3.65k
            const auto* column_string = context->get_constant_col(i);
86
3.65k
            if (column_string == nullptr) {
87
1.74k
                state->use_state = false;
88
1.74k
                return IFunction::open(context, scope);
89
1.74k
            }
90
1.91k
            auto string_vale = column_string->column_ptr->get_data_at(0);
91
1.91k
            if (string_vale.data == nullptr) {
92
                // For concat(col, null), it is handled by default_implementation_for_nulls
93
33
                state->use_state = false;
94
33
                return IFunction::open(context, scope);
95
33
            }
96
97
1.87k
            state->tail.append(string_vale.begin(), string_vale.size);
98
1.87k
        }
99
100
        // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below.
101
266
        state->tail.reserve(state->tail.size() + 16);
102
103
266
        return IFunction::open(context, scope);
104
2.03k
    }
105
106
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
107
1.94k
                        uint32_t result, size_t input_rows_count) const override {
108
1.94k
        DCHECK_GE(arguments.size(), 1);
109
110
1.94k
        if (arguments.size() == 1) {
111
7
            block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
112
7
            return Status::OK();
113
7
        }
114
1.93k
        auto* concat_state = reinterpret_cast<ConcatState*>(
115
1.93k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
116
1.93k
        if (!concat_state) {
117
0
            return Status::RuntimeError("funciton context for function '{}' must have ConcatState;",
118
0
                                        get_name());
119
0
        }
120
1.93k
        if (concat_state->use_state) {
121
246
            const auto& [col, is_const] =
122
246
                    unpack_if_const(block.get_by_position(arguments[0]).column);
123
246
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
124
246
            if (is_const) {
125
0
                return execute_const<true>(concat_state, block, col_str, result, input_rows_count);
126
246
            } else {
127
246
                return execute_const<false>(concat_state, block, col_str, result, input_rows_count);
128
246
            }
129
130
1.68k
        } else {
131
1.68k
            return execute_vecotr(block, arguments, result, input_rows_count);
132
1.68k
        }
133
1.93k
    }
134
135
    Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result,
136
1.68k
                          size_t input_rows_count) const {
137
1.68k
        int argument_size = arguments.size();
138
1.68k
        std::vector<ColumnPtr> argument_columns(argument_size);
139
140
1.68k
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
141
1.68k
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
142
1.68k
        std::vector<bool> is_const_args(argument_size);
143
144
6.64k
        for (int i = 0; i < argument_size; ++i) {
145
4.95k
            const auto& [col, is_const] =
146
4.95k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
147
148
4.95k
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
149
4.95k
            offsets_list[i] = &col_str->get_offsets();
150
4.95k
            chars_list[i] = &col_str->get_chars();
151
4.95k
            is_const_args[i] = is_const;
152
4.95k
        }
153
154
1.68k
        auto res = ColumnString::create();
155
1.68k
        auto& res_data = res->get_chars();
156
1.68k
        auto& res_offset = res->get_offsets();
157
158
1.68k
        res_offset.resize(input_rows_count);
159
1.68k
        size_t res_reserve_size = 0;
160
6.64k
        for (size_t i = 0; i < argument_size; ++i) {
161
4.95k
            if (is_const_args[i]) {
162
1.69k
                res_reserve_size += (*offsets_list[i])[0] * input_rows_count;
163
3.25k
            } else {
164
3.25k
                res_reserve_size += (*offsets_list[i])[input_rows_count - 1];
165
3.25k
            }
166
4.95k
        }
167
168
1.68k
        ColumnString::check_chars_length(res_reserve_size, 0);
169
170
1.68k
        res_data.resize(res_reserve_size);
171
172
1.68k
        auto* data = res_data.data();
173
1.68k
        size_t dst_offset = 0;
174
175
69.9k
        for (size_t i = 0; i < input_rows_count; ++i) {
176
208k
            for (size_t j = 0; j < argument_size; ++j) {
177
140k
                const auto& current_offsets = *offsets_list[j];
178
140k
                const auto& current_chars = *chars_list[j];
179
140k
                auto idx = index_check_const(i, is_const_args[j]);
180
140k
                const auto size = current_offsets[idx] - current_offsets[idx - 1];
181
140k
                if (size > 0) {
182
140k
                    memcpy_small_allow_read_write_overflow15(
183
140k
                            data + dst_offset, current_chars.data() + current_offsets[idx - 1],
184
140k
                            size);
185
140k
                    dst_offset += size;
186
140k
                }
187
140k
            }
188
68.2k
            res_offset[i] = dst_offset;
189
68.2k
        }
190
191
1.68k
        block.get_by_position(result).column = std::move(res);
192
1.68k
        return Status::OK();
193
1.68k
    }
194
195
    template <bool is_const>
196
    Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str,
197
246
                         uint32_t result, size_t input_rows_count) const {
198
        // using tail optimize
199
200
246
        auto res = ColumnString::create();
201
246
        auto& res_data = res->get_chars();
202
246
        auto& res_offset = res->get_offsets();
203
246
        res_offset.resize(input_rows_count);
204
205
246
        size_t res_reserve_size = 0;
206
246
        if constexpr (is_const) {
207
0
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
208
246
        } else {
209
246
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
210
246
        }
211
246
        res_reserve_size += concat_state->tail.size() * input_rows_count;
212
213
246
        ColumnString::check_chars_length(res_reserve_size, 0);
214
246
        res_data.resize(res_reserve_size);
215
216
246
        const auto& tail = concat_state->tail;
217
246
        auto* data = res_data.data();
218
246
        size_t dst_offset = 0;
219
220
525
        for (size_t i = 0; i < input_rows_count; ++i) {
221
279
            const auto idx = index_check_const<is_const>(i);
222
279
            StringRef str_val = col_str->get_data_at(idx);
223
            // copy column
224
279
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
225
279
            dst_offset += str_val.size;
226
            // copy tail
227
279
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
228
279
            dst_offset += tail.size();
229
279
            res_offset[i] = dst_offset;
230
279
        }
231
246
        block.get_by_position(result).column = std::move(res);
232
246
        return Status::OK();
233
246
    }
Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
_ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
Line
Count
Source
197
246
                         uint32_t result, size_t input_rows_count) const {
198
        // using tail optimize
199
200
246
        auto res = ColumnString::create();
201
246
        auto& res_data = res->get_chars();
202
246
        auto& res_offset = res->get_offsets();
203
246
        res_offset.resize(input_rows_count);
204
205
246
        size_t res_reserve_size = 0;
206
        if constexpr (is_const) {
207
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
208
246
        } else {
209
246
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
210
246
        }
211
246
        res_reserve_size += concat_state->tail.size() * input_rows_count;
212
213
246
        ColumnString::check_chars_length(res_reserve_size, 0);
214
246
        res_data.resize(res_reserve_size);
215
216
246
        const auto& tail = concat_state->tail;
217
246
        auto* data = res_data.data();
218
246
        size_t dst_offset = 0;
219
220
525
        for (size_t i = 0; i < input_rows_count; ++i) {
221
279
            const auto idx = index_check_const<is_const>(i);
222
279
            StringRef str_val = col_str->get_data_at(idx);
223
            // copy column
224
279
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
225
279
            dst_offset += str_val.size;
226
            // copy tail
227
279
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
228
279
            dst_offset += tail.size();
229
279
            res_offset[i] = dst_offset;
230
279
        }
231
246
        block.get_by_position(result).column = std::move(res);
232
246
        return Status::OK();
233
246
    }
234
};
235
236
class FunctionStringElt : public IFunction {
237
public:
238
    static constexpr auto name = "elt";
239
419
    static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); }
240
0
    String get_name() const override { return name; }
241
0
    size_t get_number_of_arguments() const override { return 0; }
242
411
    bool is_variadic() const override { return true; }
243
244
410
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
245
410
        return make_nullable(std::make_shared<DataTypeString>());
246
410
    }
247
820
    bool use_default_implementation_for_nulls() const override { return false; }
248
249
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
250
410
                        uint32_t result, size_t input_rows_count) const override {
251
410
        int arguent_size = arguments.size();
252
410
        int num_children = arguent_size - 1;
253
410
        auto res = ColumnString::create();
254
255
410
        if (auto const_column = check_and_get_column<ColumnConst>(
256
410
                    *block.get_by_position(arguments[0]).column)) {
257
154
            auto data = const_column->get_data_at(0);
258
            // return NULL, pos is null or pos < 0 or pos > num_children
259
154
            auto is_null = data.data == nullptr;
260
154
            auto pos = is_null ? 0 : *(Int32*)data.data;
261
154
            is_null = pos <= 0 || pos > num_children;
262
263
154
            auto null_map = ColumnUInt8::create(input_rows_count, is_null);
264
154
            if (is_null) {
265
135
                res->insert_many_defaults(input_rows_count);
266
135
            } else {
267
19
                auto& target_column = block.get_by_position(arguments[pos]).column;
268
19
                if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) {
269
7
                    auto target_data = target_const_column->get_data_at(0);
270
                    // return NULL, no target data
271
7
                    if (target_data.data == nullptr) {
272
1
                        null_map = ColumnUInt8::create(input_rows_count, true);
273
1
                        res->insert_many_defaults(input_rows_count);
274
6
                    } else {
275
6
                        res->insert_data_repeatedly(target_data.data, target_data.size,
276
6
                                                    input_rows_count);
277
6
                    }
278
12
                } else if (auto target_nullable_column =
279
12
                                   check_and_get_column<ColumnNullable>(*target_column)) {
280
12
                    auto& target_null_map = target_nullable_column->get_null_map_data();
281
12
                    VectorizedUtils::update_null_map(null_map->get_data(), target_null_map);
282
283
12
                    auto& target_str_column = assert_cast<const ColumnString&>(
284
12
                            target_nullable_column->get_nested_column());
285
12
                    res->get_chars().assign(target_str_column.get_chars().begin(),
286
12
                                            target_str_column.get_chars().end());
287
12
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
288
12
                                              target_str_column.get_offsets().end());
289
12
                } else {
290
0
                    auto& target_str_column = assert_cast<const ColumnString&>(*target_column);
291
0
                    res->get_chars().assign(target_str_column.get_chars().begin(),
292
0
                                            target_str_column.get_chars().end());
293
0
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
294
0
                                              target_str_column.get_offsets().end());
295
0
                }
296
19
            }
297
154
            block.get_by_position(result).column =
298
154
                    ColumnNullable::create(std::move(res), std::move(null_map));
299
256
        } else if (auto pos_null_column = check_and_get_column<ColumnNullable>(
300
256
                           *block.get_by_position(arguments[0]).column)) {
301
211
            auto& pos_column =
302
211
                    assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column());
303
211
            auto& pos_null_map = pos_null_column->get_null_map_data();
304
211
            auto null_map = ColumnUInt8::create(input_rows_count, false);
305
211
            auto& res_null_map = null_map->get_data();
306
307
481
            for (size_t i = 0; i < input_rows_count; ++i) {
308
270
                auto pos = pos_column.get_element(i);
309
270
                res_null_map[i] =
310
270
                        pos_null_map[i] || pos <= 0 || pos > num_children ||
311
270
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
312
33
                                nullptr;
313
270
                if (res_null_map[i]) {
314
237
                    res->insert_default();
315
237
                } else {
316
33
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
317
33
                    res->insert_data(insert_data.data, insert_data.size);
318
33
                }
319
270
            }
320
211
            block.get_by_position(result).column =
321
211
                    ColumnNullable::create(std::move(res), std::move(null_map));
322
211
        } else {
323
45
            auto& pos_column =
324
45
                    assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column);
325
45
            auto null_map = ColumnUInt8::create(input_rows_count, false);
326
45
            auto& res_null_map = null_map->get_data();
327
328
90
            for (size_t i = 0; i < input_rows_count; ++i) {
329
45
                auto pos = pos_column.get_element(i);
330
45
                res_null_map[i] =
331
45
                        pos <= 0 || pos > num_children ||
332
45
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
333
21
                                nullptr;
334
45
                if (res_null_map[i]) {
335
24
                    res->insert_default();
336
24
                } else {
337
21
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
338
21
                    res->insert_data(insert_data.data, insert_data.size);
339
21
                }
340
45
            }
341
45
            block.get_by_position(result).column =
342
45
                    ColumnNullable::create(std::move(res), std::move(null_map));
343
45
        }
344
410
        return Status::OK();
345
410
    }
346
};
347
348
// concat_ws (string,string....) or (string, Array)
349
// TODO: avoid use fmtlib
350
class FunctionStringConcatWs : public IFunction {
351
public:
352
    using Chars = ColumnString::Chars;
353
    using Offsets = ColumnString::Offsets;
354
355
    static constexpr auto name = "concat_ws";
356
520
    static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); }
357
0
    String get_name() const override { return name; }
358
0
    size_t get_number_of_arguments() const override { return 0; }
359
512
    bool is_variadic() const override { return true; }
360
361
511
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
362
511
        const IDataType* first_type = arguments[0].get();
363
511
        if (first_type->is_nullable()) {
364
458
            return make_nullable(std::make_shared<DataTypeString>());
365
458
        } else {
366
53
            return std::make_shared<DataTypeString>();
367
53
        }
368
511
    }
369
1.02k
    bool use_default_implementation_for_nulls() const override { return false; }
370
371
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
372
511
                        uint32_t result, size_t input_rows_count) const override {
373
511
        DCHECK_GE(arguments.size(), 2);
374
511
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
375
        // we create a zero column to simply implement
376
511
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
377
511
        auto res = ColumnString::create();
378
511
        bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable();
379
511
        size_t argument_size = arguments.size();
380
511
        std::vector<const Offsets*> offsets_list(argument_size);
381
511
        std::vector<const Chars*> chars_list(argument_size);
382
511
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
383
384
511
        std::vector<ColumnPtr> argument_columns(argument_size);
385
511
        std::vector<ColumnPtr> argument_null_columns(argument_size);
386
387
1.78k
        for (size_t i = 0; i < argument_size; ++i) {
388
1.27k
            argument_columns[i] =
389
1.27k
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
390
1.27k
            if (const auto* nullable =
391
1.27k
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
392
                // Danger: Here must dispose the null map data first! Because
393
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
394
                // of column nullable mem of null map
395
1.12k
                null_list[i] = &nullable->get_null_map_data();
396
1.12k
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
397
1.12k
                argument_columns[i] = nullable->get_nested_column_ptr();
398
1.12k
            } else {
399
150
                null_list[i] = &const_null_map->get_data();
400
150
            }
401
402
1.27k
            if (is_column<ColumnArray>(argument_columns[i].get())) {
403
62
                continue;
404
62
            }
405
406
1.21k
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
407
1.21k
            offsets_list[i] = &col_str->get_offsets();
408
1.21k
            chars_list[i] = &col_str->get_chars();
409
1.21k
        }
410
411
511
        auto& res_data = res->get_chars();
412
511
        auto& res_offset = res->get_offsets();
413
511
        res_offset.resize(input_rows_count);
414
415
511
        VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]);
416
511
        fmt::memory_buffer buffer;
417
511
        std::vector<std::string_view> views;
418
419
511
        if (is_column<ColumnArray>(argument_columns[1].get())) {
420
            // Determine if the nested type of the array is String
421
62
            const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
422
62
            if (!array_column.get_data().is_column_string()) {
423
0
                return Status::NotSupported(
424
0
                        fmt::format("unsupported nested array of type {} for function {}",
425
0
                                    is_column_nullable(array_column.get_data())
426
0
                                            ? array_column.get_data().get_name()
427
0
                                            : array_column.get_data().get_name(),
428
0
                                    get_name()));
429
0
            }
430
            // Concat string in array
431
62
            _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list,
432
62
                           null_list, res_data, res_offset);
433
434
449
        } else {
435
            // Concat string
436
449
            _execute_string(input_rows_count, argument_size, buffer, views, offsets_list,
437
449
                            chars_list, null_list, res_data, res_offset);
438
449
        }
439
511
        if (is_null_type) {
440
458
            block.get_by_position(result).column =
441
458
                    ColumnNullable::create(std::move(res), std::move(null_map));
442
458
        } else {
443
53
            block.get_by_position(result).column = std::move(res);
444
53
        }
445
511
        return Status::OK();
446
511
    }
447
448
private:
449
    void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column,
450
                        fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
451
                        const std::vector<const Offsets*>& offsets_list,
452
                        const std::vector<const Chars*>& chars_list,
453
                        const std::vector<const ColumnUInt8::Container*>& null_list,
454
62
                        Chars& res_data, Offsets& res_offset) const {
455
        // Get array nested column
456
62
        const UInt8* array_nested_null_map = nullptr;
457
62
        ColumnPtr array_nested_column = nullptr;
458
459
62
        if (is_column_nullable(array_column.get_data())) {
460
62
            const auto& array_nested_null_column =
461
62
                    reinterpret_cast<const ColumnNullable&>(array_column.get_data());
462
            // String's null map in array
463
62
            array_nested_null_map =
464
62
                    array_nested_null_column.get_null_map_column().get_data().data();
465
62
            array_nested_column = array_nested_null_column.get_nested_column_ptr();
466
62
        } else {
467
0
            array_nested_column = array_column.get_data_ptr();
468
0
        }
469
470
62
        const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column);
471
62
        const Chars& string_src_chars = string_column.get_chars();
472
62
        const auto& src_string_offsets = string_column.get_offsets();
473
62
        const auto& src_array_offsets = array_column.get_offsets();
474
62
        size_t current_src_array_offset = 0;
475
476
        // Concat string in array
477
128
        for (size_t i = 0; i < input_rows_count; ++i) {
478
66
            auto& sep_offsets = *offsets_list[0];
479
66
            auto& sep_chars = *chars_list[0];
480
66
            auto& sep_nullmap = *null_list[0];
481
482
66
            if (sep_nullmap[i]) {
483
11
                res_offset[i] = res_data.size();
484
11
                current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1];
485
11
                continue;
486
11
            }
487
488
55
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
489
55
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
490
491
55
            std::string_view sep(sep_data, sep_size);
492
55
            buffer.clear();
493
55
            views.clear();
494
495
55
            for (auto next_src_array_offset = src_array_offsets[i];
496
209
                 current_src_array_offset < next_src_array_offset; ++current_src_array_offset) {
497
154
                const auto current_src_string_offset =
498
154
                        current_src_array_offset ? src_string_offsets[current_src_array_offset - 1]
499
154
                                                 : 0;
500
154
                size_t bytes_to_copy =
501
154
                        src_string_offsets[current_src_array_offset] - current_src_string_offset;
502
154
                const char* ptr =
503
154
                        reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]);
504
505
154
                if (array_nested_null_map == nullptr ||
506
154
                    !array_nested_null_map[current_src_array_offset]) {
507
147
                    views.emplace_back(ptr, bytes_to_copy);
508
147
                }
509
154
            }
510
511
55
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
512
513
55
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
514
55
                                        res_offset);
515
55
        }
516
62
    }
517
518
    void _execute_string(const size_t& input_rows_count, const size_t& argument_size,
519
                         fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
520
                         const std::vector<const Offsets*>& offsets_list,
521
                         const std::vector<const Chars*>& chars_list,
522
                         const std::vector<const ColumnUInt8::Container*>& null_list,
523
449
                         Chars& res_data, Offsets& res_offset) const {
524
        // Concat string
525
1.01k
        for (size_t i = 0; i < input_rows_count; ++i) {
526
570
            auto& sep_offsets = *offsets_list[0];
527
570
            auto& sep_chars = *chars_list[0];
528
570
            auto& sep_nullmap = *null_list[0];
529
570
            if (sep_nullmap[i]) {
530
78
                res_offset[i] = res_data.size();
531
78
                continue;
532
78
            }
533
534
492
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
535
492
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
536
537
492
            std::string_view sep(sep_data, sep_size);
538
492
            buffer.clear();
539
492
            views.clear();
540
1.20k
            for (size_t j = 1; j < argument_size; ++j) {
541
714
                auto& current_offsets = *offsets_list[j];
542
714
                auto& current_chars = *chars_list[j];
543
714
                auto& current_nullmap = *null_list[j];
544
714
                int size = current_offsets[i] - current_offsets[i - 1];
545
714
                const char* ptr =
546
714
                        reinterpret_cast<const char*>(&current_chars[current_offsets[i - 1]]);
547
714
                if (!current_nullmap[i]) {
548
638
                    views.emplace_back(ptr, size);
549
638
                }
550
714
            }
551
492
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
552
492
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
553
492
                                        res_offset);
554
492
        }
555
449
    }
556
};
557
558
class FunctionStringRepeat : public IFunction {
559
public:
560
    static constexpr auto name = "repeat";
561
222
    static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
562
1
    String get_name() const override { return name; }
563
213
    size_t get_number_of_arguments() const override { return 2; }
564
    // should set NULL value of nested data to default,
565
    // as iff it's not inited and invalid, the repeat result of length is so large cause overflow
566
163
    bool need_replace_null_data_to_default() const override { return true; }
567
568
213
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
569
213
        return make_nullable(std::make_shared<DataTypeString>());
570
213
    }
571
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
572
190
                        uint32_t result, size_t input_rows_count) const override {
573
190
        DCHECK_EQ(arguments.size(), 2);
574
190
        auto res = ColumnString::create();
575
190
        auto null_map = ColumnUInt8::create();
576
577
190
        ColumnPtr argument_ptr[2];
578
190
        argument_ptr[0] =
579
190
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
580
190
        argument_ptr[1] = block.get_by_position(arguments[1]).column;
581
582
190
        if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
583
190
            if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
584
134
                RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(),
585
134
                                              col2->get_data(), res->get_chars(),
586
134
                                              res->get_offsets(), null_map->get_data()));
587
134
                block.replace_by_position(
588
134
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
589
134
                return Status::OK();
590
134
            } else if (const auto* col2_const =
591
56
                               check_and_get_column<ColumnConst>(*argument_ptr[1])) {
592
56
                DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
593
56
                int repeat = col2_const->get_int(0);
594
56
                if (repeat <= 0) {
595
18
                    null_map->get_data().resize_fill(input_rows_count, 0);
596
18
                    res->insert_many_defaults(input_rows_count);
597
38
                } else {
598
38
                    vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
599
38
                                 res->get_offsets(), null_map->get_data());
600
38
                }
601
56
                block.replace_by_position(
602
56
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
603
56
                return Status::OK();
604
56
            }
605
190
        }
606
607
0
        return Status::RuntimeError("repeat function get error param: {}, {}",
608
0
                                    argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
609
190
    }
610
611
    Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
612
                         const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
613
                         ColumnString::Offsets& res_offsets,
614
134
                         ColumnUInt8::Container& null_map) const {
615
134
        size_t input_row_size = offsets.size();
616
617
134
        fmt::memory_buffer buffer;
618
134
        res_offsets.resize(input_row_size);
619
134
        null_map.resize_fill(input_row_size, 0);
620
327
        for (ssize_t i = 0; i < input_row_size; ++i) {
621
193
            buffer.clear();
622
193
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
623
193
            size_t size = offsets[i] - offsets[i - 1];
624
193
            int repeat = repeats[i];
625
193
            if (repeat <= 0) {
626
64
                StringOP::push_empty_string(i, res_data, res_offsets);
627
129
            } else {
628
129
                ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
629
723
                for (int j = 0; j < repeat; ++j) {
630
594
                    buffer.append(raw_str, raw_str + size);
631
594
                }
632
129
                StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
633
129
                                            res_data, res_offsets);
634
129
            }
635
193
        }
636
134
        return Status::OK();
637
134
    }
638
639
    // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
640
    //       2. abstract the `vector_vector` and `vector_const`
641
    //       3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
642
    void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
643
                      int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
644
38
                      ColumnUInt8::Container& null_map) const {
645
38
        size_t input_row_size = offsets.size();
646
647
38
        fmt::memory_buffer buffer;
648
38
        res_offsets.resize(input_row_size);
649
38
        null_map.resize_fill(input_row_size, 0);
650
100
        for (ssize_t i = 0; i < input_row_size; ++i) {
651
62
            buffer.clear();
652
62
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
653
62
            size_t size = offsets[i] - offsets[i - 1];
654
62
            ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
655
656
285
            for (int j = 0; j < repeat; ++j) {
657
223
                buffer.append(raw_str, raw_str + size);
658
223
            }
659
62
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
660
62
                                        res_offsets);
661
62
        }
662
38
    }
663
};
664
665
/// PaddingChars pre-processes the pad string for efficient padding.
666
/// When is_utf8=false, character count equals byte count — no UTF-8 decoding needed.
667
/// When is_utf8=true, we build a byte-offset table for code points.
668
/// In both cases, the pad string is pre-expanded (doubled) until it has >= 16 characters,
669
/// so that each memcpy in append_to copies at least 16 bytes at a time.
670
template <bool is_utf8>
671
struct PaddingChars {
672
    std::string pad_string;
673
    /// utf8_byte_offsets[i] = byte offset of i-th code point in pad_string.
674
    /// utf8_byte_offsets has (num_chars + 1) entries, with [0]=0 and [num_chars]=pad_string.size().
675
    std::vector<size_t> utf8_byte_offsets;
676
677
    explicit PaddingChars(const uint8_t* data, size_t len)
678
1.20k
            : pad_string(reinterpret_cast<const char*>(data), len) {
679
1.20k
        init();
680
1.20k
    }
_ZN5doris12PaddingCharsILb0EEC2EPKhm
Line
Count
Source
678
56
            : pad_string(reinterpret_cast<const char*>(data), len) {
679
56
        init();
680
56
    }
_ZN5doris12PaddingCharsILb1EEC2EPKhm
Line
Count
Source
678
1.14k
            : pad_string(reinterpret_cast<const char*>(data), len) {
679
1.14k
        init();
680
1.14k
    }
681
682
14.0k
    size_t num_chars() const {
683
14.0k
        if constexpr (is_utf8) {
684
5.63k
            return utf8_byte_offsets.size() - 1;
685
8.41k
        } else {
686
8.41k
            return pad_string.size();
687
8.41k
        }
688
14.0k
    }
_ZNK5doris12PaddingCharsILb0EE9num_charsEv
Line
Count
Source
682
8.41k
    size_t num_chars() const {
683
        if constexpr (is_utf8) {
684
            return utf8_byte_offsets.size() - 1;
685
8.41k
        } else {
686
8.41k
            return pad_string.size();
687
8.41k
        }
688
8.41k
    }
_ZNK5doris12PaddingCharsILb1EE9num_charsEv
Line
Count
Source
682
5.63k
    size_t num_chars() const {
683
5.63k
        if constexpr (is_utf8) {
684
5.63k
            return utf8_byte_offsets.size() - 1;
685
        } else {
686
            return pad_string.size();
687
        }
688
5.63k
    }
689
690
23.8k
    size_t chars_to_bytes(size_t n) const {
691
23.8k
        if constexpr (is_utf8) {
692
19.7k
            return utf8_byte_offsets[n];
693
19.7k
        } else {
694
4.13k
            return n;
695
4.13k
        }
696
23.8k
    }
_ZNK5doris12PaddingCharsILb0EE14chars_to_bytesEm
Line
Count
Source
690
4.13k
    size_t chars_to_bytes(size_t n) const {
691
        if constexpr (is_utf8) {
692
            return utf8_byte_offsets[n];
693
4.13k
        } else {
694
4.13k
            return n;
695
4.13k
        }
696
4.13k
    }
_ZNK5doris12PaddingCharsILb1EE14chars_to_bytesEm
Line
Count
Source
690
19.7k
    size_t chars_to_bytes(size_t n) const {
691
19.7k
        if constexpr (is_utf8) {
692
19.7k
            return utf8_byte_offsets[n];
693
        } else {
694
            return n;
695
        }
696
19.7k
    }
697
698
    /// Append `num_chars_to_pad` padding characters to dst, return bytes written.
699
2.47k
    size_t append_to(uint8_t* dst, size_t num_chars_to_pad) const {
700
2.47k
        if (num_chars_to_pad == 0) {
701
0
            return 0;
702
0
        }
703
2.47k
        const auto* src = reinterpret_cast<const uint8_t*>(pad_string.data());
704
2.47k
        const size_t step = num_chars();
705
2.47k
        uint8_t* dst_start = dst;
706
21.3k
        while (num_chars_to_pad > step) {
707
18.9k
            size_t bytes = chars_to_bytes(step);
708
18.9k
            memcpy(dst, src, bytes);
709
18.9k
            dst += bytes;
710
18.9k
            num_chars_to_pad -= step;
711
18.9k
        }
712
2.47k
        size_t bytes = chars_to_bytes(num_chars_to_pad);
713
2.47k
        memcpy(dst, src, bytes);
714
2.47k
        dst += bytes;
715
2.47k
        return dst - dst_start;
716
2.47k
    }
_ZNK5doris12PaddingCharsILb0EE9append_toEPhm
Line
Count
Source
699
2.06k
    size_t append_to(uint8_t* dst, size_t num_chars_to_pad) const {
700
2.06k
        if (num_chars_to_pad == 0) {
701
0
            return 0;
702
0
        }
703
2.06k
        const auto* src = reinterpret_cast<const uint8_t*>(pad_string.data());
704
2.06k
        const size_t step = num_chars();
705
2.06k
        uint8_t* dst_start = dst;
706
2.06k
        while (num_chars_to_pad > step) {
707
0
            size_t bytes = chars_to_bytes(step);
708
0
            memcpy(dst, src, bytes);
709
0
            dst += bytes;
710
0
            num_chars_to_pad -= step;
711
0
        }
712
2.06k
        size_t bytes = chars_to_bytes(num_chars_to_pad);
713
2.06k
        memcpy(dst, src, bytes);
714
2.06k
        dst += bytes;
715
2.06k
        return dst - dst_start;
716
2.06k
    }
_ZNK5doris12PaddingCharsILb1EE9append_toEPhm
Line
Count
Source
699
403
    size_t append_to(uint8_t* dst, size_t num_chars_to_pad) const {
700
403
        if (num_chars_to_pad == 0) {
701
0
            return 0;
702
0
        }
703
403
        const auto* src = reinterpret_cast<const uint8_t*>(pad_string.data());
704
403
        const size_t step = num_chars();
705
403
        uint8_t* dst_start = dst;
706
19.3k
        while (num_chars_to_pad > step) {
707
18.9k
            size_t bytes = chars_to_bytes(step);
708
18.9k
            memcpy(dst, src, bytes);
709
18.9k
            dst += bytes;
710
18.9k
            num_chars_to_pad -= step;
711
18.9k
        }
712
403
        size_t bytes = chars_to_bytes(num_chars_to_pad);
713
403
        memcpy(dst, src, bytes);
714
403
        dst += bytes;
715
403
        return dst - dst_start;
716
403
    }
717
718
private:
719
1.20k
    void init() {
720
1.20k
        if (pad_string.empty()) {
721
0
            return;
722
0
        }
723
724
1.20k
        if constexpr (is_utf8) {
725
            // Build byte-offset table for each code point.
726
1.14k
            size_t offset = 0;
727
1.14k
            utf8_byte_offsets.reserve(pad_string.size() + 1);
728
7.62k
            while (offset < pad_string.size()) {
729
6.48k
                utf8_byte_offsets.push_back(offset);
730
6.48k
                offset += get_utf8_byte_length(static_cast<uint8_t>(pad_string[offset]));
731
6.48k
                offset = std::min(offset, pad_string.size());
732
6.48k
            }
733
1.14k
            utf8_byte_offsets.push_back(pad_string.size());
734
1.14k
        }
735
736
        // Pre-expand pad_string until it has >= 16 characters.
737
        // This ensures append_to() copies at least 16 bytes per iteration.
738
4.13k
        while (num_chars() < 16) {
739
2.93k
            if constexpr (is_utf8) {
740
2.84k
                size_t old_count = utf8_byte_offsets.size();
741
2.84k
                size_t base = utf8_byte_offsets.back();
742
19.7k
                for (size_t i = 1; i < old_count; ++i) {
743
16.8k
                    utf8_byte_offsets.push_back(utf8_byte_offsets[i] + base);
744
16.8k
                }
745
2.84k
            }
746
2.93k
            pad_string += pad_string;
747
2.93k
        }
748
1.20k
    }
_ZN5doris12PaddingCharsILb0EE4initEv
Line
Count
Source
719
56
    void init() {
720
56
        if (pad_string.empty()) {
721
0
            return;
722
0
        }
723
724
        if constexpr (is_utf8) {
725
            // Build byte-offset table for each code point.
726
            size_t offset = 0;
727
            utf8_byte_offsets.reserve(pad_string.size() + 1);
728
            while (offset < pad_string.size()) {
729
                utf8_byte_offsets.push_back(offset);
730
                offset += get_utf8_byte_length(static_cast<uint8_t>(pad_string[offset]));
731
                offset = std::min(offset, pad_string.size());
732
            }
733
            utf8_byte_offsets.push_back(pad_string.size());
734
        }
735
736
        // Pre-expand pad_string until it has >= 16 characters.
737
        // This ensures append_to() copies at least 16 bytes per iteration.
738
139
        while (num_chars() < 16) {
739
            if constexpr (is_utf8) {
740
                size_t old_count = utf8_byte_offsets.size();
741
                size_t base = utf8_byte_offsets.back();
742
                for (size_t i = 1; i < old_count; ++i) {
743
                    utf8_byte_offsets.push_back(utf8_byte_offsets[i] + base);
744
                }
745
            }
746
83
            pad_string += pad_string;
747
83
        }
748
56
    }
_ZN5doris12PaddingCharsILb1EE4initEv
Line
Count
Source
719
1.14k
    void init() {
720
1.14k
        if (pad_string.empty()) {
721
0
            return;
722
0
        }
723
724
1.14k
        if constexpr (is_utf8) {
725
            // Build byte-offset table for each code point.
726
1.14k
            size_t offset = 0;
727
1.14k
            utf8_byte_offsets.reserve(pad_string.size() + 1);
728
7.62k
            while (offset < pad_string.size()) {
729
6.48k
                utf8_byte_offsets.push_back(offset);
730
6.48k
                offset += get_utf8_byte_length(static_cast<uint8_t>(pad_string[offset]));
731
6.48k
                offset = std::min(offset, pad_string.size());
732
6.48k
            }
733
1.14k
            utf8_byte_offsets.push_back(pad_string.size());
734
1.14k
        }
735
736
        // Pre-expand pad_string until it has >= 16 characters.
737
        // This ensures append_to() copies at least 16 bytes per iteration.
738
3.99k
        while (num_chars() < 16) {
739
2.84k
            if constexpr (is_utf8) {
740
2.84k
                size_t old_count = utf8_byte_offsets.size();
741
2.84k
                size_t base = utf8_byte_offsets.back();
742
19.7k
                for (size_t i = 1; i < old_count; ++i) {
743
16.8k
                    utf8_byte_offsets.push_back(utf8_byte_offsets[i] + base);
744
16.8k
                }
745
2.84k
            }
746
2.84k
            pad_string += pad_string;
747
2.84k
        }
748
1.14k
    }
749
};
750
751
template <typename Impl>
752
class FunctionStringPad : public IFunction {
753
public:
754
    static constexpr auto name = Impl::name;
755
1.84k
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv
Line
Count
Source
755
1.08k
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv
Line
Count
Source
755
767
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
756
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev
Line
Count
Source
756
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev
Line
Count
Source
756
1
    String get_name() const override { return name; }
757
1.82k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv
Line
Count
Source
757
1.07k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv
Line
Count
Source
757
758
    size_t get_number_of_arguments() const override { return 3; }
758
759
1.82k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
760
1.82k
        return make_nullable(std::make_shared<DataTypeString>());
761
1.82k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
759
1.07k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
760
1.07k
        return make_nullable(std::make_shared<DataTypeString>());
761
1.07k
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
759
758
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
760
758
        return make_nullable(std::make_shared<DataTypeString>());
761
758
    }
762
763
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
764
1.32k
                        uint32_t result, size_t input_rows_count) const override {
765
1.32k
        DCHECK_GE(arguments.size(), 3);
766
1.32k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
767
1.32k
        auto res = ColumnString::create();
768
769
1.32k
        ColumnPtr col[3];
770
1.32k
        bool col_const[3];
771
5.30k
        for (size_t i = 0; i < 3; ++i) {
772
3.97k
            std::tie(col[i], col_const[i]) =
773
3.97k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
774
3.97k
        }
775
1.32k
        auto& null_map_data = null_map->get_data();
776
1.32k
        auto& res_offsets = res->get_offsets();
777
1.32k
        auto& res_chars = res->get_chars();
778
1.32k
        res_offsets.resize(input_rows_count);
779
780
1.32k
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
781
1.32k
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
782
1.32k
        const auto& col_len_data = col_len->get_data();
783
784
1.32k
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
785
786
1.32k
        if (col_const[1] && col_const[2]) {
787
133
            auto pad = padcol->get_data_at(0);
788
133
            const bool pad_all_ascii =
789
133
                    simd::VStringFunctions::is_ascii({pad.data, static_cast<size_t>(pad.size)});
790
133
            const bool all_ascii = pad_all_ascii && strcol->is_ascii();
791
133
            std::visit(
792
133
                    [&](auto str_const) {
793
133
                        if (all_ascii) {
794
78
                            execute_const_len_const_pad<true, str_const>(
795
78
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
796
78
                                    null_map_data, input_rows_count);
797
78
                        } else {
798
55
                            execute_const_len_const_pad<false, str_const>(
799
55
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
800
55
                                    null_map_data, input_rows_count);
801
55
                        }
802
133
                    },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb0EEEEDaSC_
Line
Count
Source
792
66
                    [&](auto str_const) {
793
66
                        if (all_ascii) {
794
39
                            execute_const_len_const_pad<true, str_const>(
795
39
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
796
39
                                    null_map_data, input_rows_count);
797
39
                        } else {
798
27
                            execute_const_len_const_pad<false, str_const>(
799
27
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
800
27
                                    null_map_data, input_rows_count);
801
27
                        }
802
66
                    },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb1EEEEDaSC_
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb0EEEEDaSC_
Line
Count
Source
792
67
                    [&](auto str_const) {
793
67
                        if (all_ascii) {
794
39
                            execute_const_len_const_pad<true, str_const>(
795
39
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
796
39
                                    null_map_data, input_rows_count);
797
39
                        } else {
798
28
                            execute_const_len_const_pad<false, str_const>(
799
28
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
800
28
                                    null_map_data, input_rows_count);
801
28
                        }
802
67
                    },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E_clISt17integral_constantIbLb1EEEEDaSC_
803
133
                    make_bool_variant(col_const[0]));
804
1.19k
        } else {
805
1.19k
            std::visit(
806
1.19k
                    [&](auto str_const) {
807
1.19k
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
1.19k
                                                   col_const[2], res_offsets, res_chars,
809
1.19k
                                                   null_map_data, input_rows_count);
810
1.19k
                    },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb0EEEEDaSC_
Line
Count
Source
806
563
                    [&](auto str_const) {
807
563
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
563
                                                   col_const[2], res_offsets, res_chars,
809
563
                                                   null_map_data, input_rows_count);
810
563
                    },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb1EEEEDaSC_
Line
Count
Source
806
186
                    [&](auto str_const) {
807
186
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
186
                                                   col_const[2], res_offsets, res_chars,
809
186
                                                   null_map_data, input_rows_count);
810
186
                    },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb0EEEEDaSC_
Line
Count
Source
806
257
                    [&](auto str_const) {
807
257
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
257
                                                   col_const[2], res_offsets, res_chars,
809
257
                                                   null_map_data, input_rows_count);
810
257
                    },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_E0_clISt17integral_constantIbLb1EEEEDaSC_
Line
Count
Source
806
186
                    [&](auto str_const) {
807
186
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
186
                                                   col_const[2], res_offsets, res_chars,
809
186
                                                   null_map_data, input_rows_count);
810
186
                    },
811
1.19k
                    make_bool_variant(col_const[0]));
812
1.19k
        }
813
814
1.32k
        block.get_by_position(result).column =
815
1.32k
                ColumnNullable::create(std::move(res), std::move(null_map));
816
1.32k
        return Status::OK();
817
1.32k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
764
815
                        uint32_t result, size_t input_rows_count) const override {
765
815
        DCHECK_GE(arguments.size(), 3);
766
815
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
767
815
        auto res = ColumnString::create();
768
769
815
        ColumnPtr col[3];
770
815
        bool col_const[3];
771
3.26k
        for (size_t i = 0; i < 3; ++i) {
772
2.44k
            std::tie(col[i], col_const[i]) =
773
2.44k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
774
2.44k
        }
775
815
        auto& null_map_data = null_map->get_data();
776
815
        auto& res_offsets = res->get_offsets();
777
815
        auto& res_chars = res->get_chars();
778
815
        res_offsets.resize(input_rows_count);
779
780
815
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
781
815
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
782
815
        const auto& col_len_data = col_len->get_data();
783
784
815
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
785
786
815
        if (col_const[1] && col_const[2]) {
787
66
            auto pad = padcol->get_data_at(0);
788
66
            const bool pad_all_ascii =
789
66
                    simd::VStringFunctions::is_ascii({pad.data, static_cast<size_t>(pad.size)});
790
66
            const bool all_ascii = pad_all_ascii && strcol->is_ascii();
791
66
            std::visit(
792
66
                    [&](auto str_const) {
793
66
                        if (all_ascii) {
794
66
                            execute_const_len_const_pad<true, str_const>(
795
66
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
796
66
                                    null_map_data, input_rows_count);
797
66
                        } else {
798
66
                            execute_const_len_const_pad<false, str_const>(
799
66
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
800
66
                                    null_map_data, input_rows_count);
801
66
                        }
802
66
                    },
803
66
                    make_bool_variant(col_const[0]));
804
749
        } else {
805
749
            std::visit(
806
749
                    [&](auto str_const) {
807
749
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
749
                                                   col_const[2], res_offsets, res_chars,
809
749
                                                   null_map_data, input_rows_count);
810
749
                    },
811
749
                    make_bool_variant(col_const[0]));
812
749
        }
813
814
815
        block.get_by_position(result).column =
815
815
                ColumnNullable::create(std::move(res), std::move(null_map));
816
815
        return Status::OK();
817
815
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
764
510
                        uint32_t result, size_t input_rows_count) const override {
765
510
        DCHECK_GE(arguments.size(), 3);
766
510
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
767
510
        auto res = ColumnString::create();
768
769
510
        ColumnPtr col[3];
770
510
        bool col_const[3];
771
2.04k
        for (size_t i = 0; i < 3; ++i) {
772
1.53k
            std::tie(col[i], col_const[i]) =
773
1.53k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
774
1.53k
        }
775
510
        auto& null_map_data = null_map->get_data();
776
510
        auto& res_offsets = res->get_offsets();
777
510
        auto& res_chars = res->get_chars();
778
510
        res_offsets.resize(input_rows_count);
779
780
510
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
781
510
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
782
510
        const auto& col_len_data = col_len->get_data();
783
784
510
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
785
786
510
        if (col_const[1] && col_const[2]) {
787
67
            auto pad = padcol->get_data_at(0);
788
67
            const bool pad_all_ascii =
789
67
                    simd::VStringFunctions::is_ascii({pad.data, static_cast<size_t>(pad.size)});
790
67
            const bool all_ascii = pad_all_ascii && strcol->is_ascii();
791
67
            std::visit(
792
67
                    [&](auto str_const) {
793
67
                        if (all_ascii) {
794
67
                            execute_const_len_const_pad<true, str_const>(
795
67
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
796
67
                                    null_map_data, input_rows_count);
797
67
                        } else {
798
67
                            execute_const_len_const_pad<false, str_const>(
799
67
                                    *strcol, col_len_data, *padcol, res_offsets, res_chars,
800
67
                                    null_map_data, input_rows_count);
801
67
                        }
802
67
                    },
803
67
                    make_bool_variant(col_const[0]));
804
443
        } else {
805
443
            std::visit(
806
443
                    [&](auto str_const) {
807
443
                        execute_general<str_const>(*strcol, col_len_data, col_const[1], *padcol,
808
443
                                                   col_const[2], res_offsets, res_chars,
809
443
                                                   null_map_data, input_rows_count);
810
443
                    },
811
443
                    make_bool_variant(col_const[0]));
812
443
        }
813
814
510
        block.get_by_position(result).column =
815
510
                ColumnNullable::create(std::move(res), std::move(null_map));
816
510
        return Status::OK();
817
510
    }
818
819
private:
820
    template <bool is_utf8>
821
3.05k
    static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) {
822
3.05k
        if constexpr (is_utf8) {
823
914
            return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data),
824
914
                                                        str_byte_len);
825
914
        }
826
0
        return str_byte_len;
827
3.05k
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE15get_char_lengthILb0EEEmPKhm
Line
Count
Source
821
50
    static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) {
822
        if constexpr (is_utf8) {
823
            return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data),
824
                                                        str_byte_len);
825
        }
826
50
        return str_byte_len;
827
50
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE15get_char_lengthILb1EEEmPKhm
Line
Count
Source
821
610
    static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) {
822
610
        if constexpr (is_utf8) {
823
610
            return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data),
824
610
                                                        str_byte_len);
825
610
        }
826
0
        return str_byte_len;
827
610
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE15get_char_lengthILb0EEEmPKhm
Line
Count
Source
821
2.09k
    static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) {
822
        if constexpr (is_utf8) {
823
            return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data),
824
                                                        str_byte_len);
825
        }
826
2.09k
        return str_byte_len;
827
2.09k
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE15get_char_lengthILb1EEEmPKhm
Line
Count
Source
821
304
    static size_t get_char_length(const uint8_t* str_data, size_t str_byte_len) {
822
304
        if constexpr (is_utf8) {
823
304
            return simd::VStringFunctions::get_char_len(reinterpret_cast<const char*>(str_data),
824
304
                                                        str_byte_len);
825
304
        }
826
0
        return str_byte_len;
827
304
    }
828
829
    template <bool is_utf8>
830
    static size_t get_truncated_byte_length(const uint8_t* str_data, size_t str_byte_len,
831
571
                                            size_t str_char_len, size_t target_len) {
832
571
        if constexpr (!is_utf8) {
833
76
            return target_len;
834
76
        }
835
571
        if (str_char_len == target_len) {
836
84
            return str_byte_len;
837
84
        }
838
487
        auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length(
839
487
                reinterpret_cast<const char*>(str_data),
840
487
                reinterpret_cast<const char*>(str_data) + str_byte_len, target_len);
841
487
        return byte_len;
842
571
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE25get_truncated_byte_lengthILb0EEEmPKhmmm
Line
Count
Source
831
38
                                            size_t str_char_len, size_t target_len) {
832
38
        if constexpr (!is_utf8) {
833
38
            return target_len;
834
38
        }
835
38
        if (str_char_len == target_len) {
836
0
            return str_byte_len;
837
0
        }
838
38
        auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length(
839
38
                reinterpret_cast<const char*>(str_data),
840
38
                reinterpret_cast<const char*>(str_data) + str_byte_len, target_len);
841
38
        return byte_len;
842
38
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE25get_truncated_byte_lengthILb1EEEmPKhmmm
Line
Count
Source
831
252
                                            size_t str_char_len, size_t target_len) {
832
        if constexpr (!is_utf8) {
833
            return target_len;
834
        }
835
252
        if (str_char_len == target_len) {
836
43
            return str_byte_len;
837
43
        }
838
209
        auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length(
839
209
                reinterpret_cast<const char*>(str_data),
840
209
                reinterpret_cast<const char*>(str_data) + str_byte_len, target_len);
841
209
        return byte_len;
842
252
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE25get_truncated_byte_lengthILb0EEEmPKhmmm
Line
Count
Source
831
38
                                            size_t str_char_len, size_t target_len) {
832
38
        if constexpr (!is_utf8) {
833
38
            return target_len;
834
38
        }
835
38
        if (str_char_len == target_len) {
836
0
            return str_byte_len;
837
0
        }
838
38
        auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length(
839
38
                reinterpret_cast<const char*>(str_data),
840
38
                reinterpret_cast<const char*>(str_data) + str_byte_len, target_len);
841
38
        return byte_len;
842
38
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE25get_truncated_byte_lengthILb1EEEmPKhmmm
Line
Count
Source
831
243
                                            size_t str_char_len, size_t target_len) {
832
        if constexpr (!is_utf8) {
833
            return target_len;
834
        }
835
243
        if (str_char_len == target_len) {
836
41
            return str_byte_len;
837
41
        }
838
202
        auto [byte_len, _] = simd::VStringFunctions::iterate_utf8_with_limit_length(
839
202
                reinterpret_cast<const char*>(str_data),
840
202
                reinterpret_cast<const char*>(str_data) + str_byte_len, target_len);
841
202
        return byte_len;
842
243
    }
843
844
3.04k
    static void ensure_capacity(ColumnString::Chars& res_chars, size_t needed, size_t row) {
845
3.04k
        if (needed <= res_chars.size()) {
846
2.65k
            return;
847
2.65k
        }
848
392
        ColumnString::check_chars_length(needed, row);
849
392
        res_chars.resize(std::max(needed, res_chars.size() * 3 / 2));
850
392
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE15ensure_capacityERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEmm
Line
Count
Source
844
652
    static void ensure_capacity(ColumnString::Chars& res_chars, size_t needed, size_t row) {
845
652
        if (needed <= res_chars.size()) {
846
307
            return;
847
307
        }
848
345
        ColumnString::check_chars_length(needed, row);
849
345
        res_chars.resize(std::max(needed, res_chars.size() * 3 / 2));
850
345
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE15ensure_capacityERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEmm
Line
Count
Source
844
2.39k
    static void ensure_capacity(ColumnString::Chars& res_chars, size_t needed, size_t row) {
845
2.39k
        if (needed <= res_chars.size()) {
846
2.34k
            return;
847
2.34k
        }
848
47
        ColumnString::check_chars_length(needed, row);
849
47
        res_chars.resize(std::max(needed, res_chars.size() * 3 / 2));
850
47
    }
851
852
    template <bool is_utf8>
853
    static size_t estimate_const_output_bytes(const ColumnString::Chars& strcol_chars,
854
                                              int target_len, size_t input_rows_count,
855
133
                                              const PaddingChars<is_utf8>* padding) {
856
133
        if (target_len <= 0) {
857
96
            return 0;
858
96
        }
859
37
        if constexpr (!is_utf8) {
860
24
            return static_cast<size_t>(target_len) * input_rows_count;
861
24
        }
862
37
        if (padding != nullptr && padding->num_chars() > 0) {
863
11
            size_t pad_bytes_per_char =
864
11
                    (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars();
865
11
            return strcol_chars.size() +
866
11
                   static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count;
867
11
        }
868
26
        return strcol_chars.size();
869
37
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE27estimate_const_output_bytesILb0EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE
Line
Count
Source
855
39
                                              const PaddingChars<is_utf8>* padding) {
856
39
        if (target_len <= 0) {
857
27
            return 0;
858
27
        }
859
12
        if constexpr (!is_utf8) {
860
12
            return static_cast<size_t>(target_len) * input_rows_count;
861
12
        }
862
12
        if (padding != nullptr && padding->num_chars() > 0) {
863
0
            size_t pad_bytes_per_char =
864
0
                    (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars();
865
0
            return strcol_chars.size() +
866
0
                   static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count;
867
0
        }
868
12
        return strcol_chars.size();
869
12
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE27estimate_const_output_bytesILb1EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE
Line
Count
Source
855
27
                                              const PaddingChars<is_utf8>* padding) {
856
27
        if (target_len <= 0) {
857
21
            return 0;
858
21
        }
859
        if constexpr (!is_utf8) {
860
            return static_cast<size_t>(target_len) * input_rows_count;
861
        }
862
6
        if (padding != nullptr && padding->num_chars() > 0) {
863
5
            size_t pad_bytes_per_char =
864
5
                    (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars();
865
5
            return strcol_chars.size() +
866
5
                   static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count;
867
5
        }
868
1
        return strcol_chars.size();
869
6
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE27estimate_const_output_bytesILb0EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE
Line
Count
Source
855
39
                                              const PaddingChars<is_utf8>* padding) {
856
39
        if (target_len <= 0) {
857
27
            return 0;
858
27
        }
859
12
        if constexpr (!is_utf8) {
860
12
            return static_cast<size_t>(target_len) * input_rows_count;
861
12
        }
862
12
        if (padding != nullptr && padding->num_chars() > 0) {
863
0
            size_t pad_bytes_per_char =
864
0
                    (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars();
865
0
            return strcol_chars.size() +
866
0
                   static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count;
867
0
        }
868
12
        return strcol_chars.size();
869
12
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE27estimate_const_output_bytesILb1EEEmRKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEimPKNS_12PaddingCharsIXT_EEE
Line
Count
Source
855
28
                                              const PaddingChars<is_utf8>* padding) {
856
28
        if (target_len <= 0) {
857
21
            return 0;
858
21
        }
859
        if constexpr (!is_utf8) {
860
            return static_cast<size_t>(target_len) * input_rows_count;
861
        }
862
7
        if (padding != nullptr && padding->num_chars() > 0) {
863
6
            size_t pad_bytes_per_char =
864
6
                    (padding->pad_string.size() + padding->num_chars() - 1) / padding->num_chars();
865
6
            return strcol_chars.size() +
866
6
                   static_cast<size_t>(target_len) * pad_bytes_per_char * input_rows_count;
867
6
        }
868
1
        return strcol_chars.size();
869
7
    }
870
871
    template <bool is_utf8>
872
    static void append_result_row(const uint8_t* str_data, size_t str_byte_len, int target_len,
873
                                  const PaddingChars<is_utf8>* padding,
874
                                  ColumnString::Chars& res_chars,
875
                                  ColumnString::Offsets& res_offsets,
876
                                  ColumnUInt8::Container& null_map_data, size_t row,
877
3.62k
                                  size_t& dst_offset) {
878
3.62k
        if (target_len < 0) {
879
562
            null_map_data[row] = true;
880
562
            res_offsets[row] = dst_offset;
881
562
            return;
882
562
        }
883
884
3.05k
        const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len);
885
3.05k
        const size_t target_char_len = static_cast<size_t>(target_len);
886
3.05k
        if (str_char_len >= target_char_len) {
887
571
            const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>(
888
571
                    str_data, str_byte_len, str_char_len, target_char_len);
889
571
            const size_t needed = dst_offset + truncated_byte_len;
890
571
            ensure_capacity(res_chars, needed, row);
891
571
            memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len);
892
571
            dst_offset += truncated_byte_len;
893
571
            res_offsets[row] = dst_offset;
894
571
            return;
895
571
        }
896
897
2.48k
        if (padding == nullptr || padding->num_chars() == 0) {
898
16
            res_offsets[row] = dst_offset;
899
16
            return;
900
16
        }
901
902
2.47k
        const size_t pad_char_count = target_char_len - str_char_len;
903
2.47k
        const size_t full_cycles = pad_char_count / padding->num_chars();
904
2.47k
        const size_t remainder_chars = pad_char_count % padding->num_chars();
905
2.47k
        const size_t pad_bytes =
906
2.47k
                full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars);
907
2.47k
        const size_t needed = dst_offset + str_byte_len + pad_bytes;
908
2.47k
        ensure_capacity(res_chars, needed, row);
909
910
2.47k
        if constexpr (Impl::is_lpad) {
911
362
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
912
362
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
913
362
            dst_offset += str_byte_len;
914
2.11k
        } else {
915
2.11k
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
916
2.11k
            dst_offset += str_byte_len;
917
2.11k
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
918
2.11k
        }
919
2.47k
        res_offsets[row] = dst_offset;
920
2.47k
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE17append_result_rowILb0EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm
Line
Count
Source
877
68
                                  size_t& dst_offset) {
878
68
        if (target_len < 0) {
879
18
            null_map_data[row] = true;
880
18
            res_offsets[row] = dst_offset;
881
18
            return;
882
18
        }
883
884
50
        const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len);
885
50
        const size_t target_char_len = static_cast<size_t>(target_len);
886
50
        if (str_char_len >= target_char_len) {
887
38
            const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>(
888
38
                    str_data, str_byte_len, str_char_len, target_char_len);
889
38
            const size_t needed = dst_offset + truncated_byte_len;
890
38
            ensure_capacity(res_chars, needed, row);
891
38
            memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len);
892
38
            dst_offset += truncated_byte_len;
893
38
            res_offsets[row] = dst_offset;
894
38
            return;
895
38
        }
896
897
12
        if (padding == nullptr || padding->num_chars() == 0) {
898
0
            res_offsets[row] = dst_offset;
899
0
            return;
900
0
        }
901
902
12
        const size_t pad_char_count = target_char_len - str_char_len;
903
12
        const size_t full_cycles = pad_char_count / padding->num_chars();
904
12
        const size_t remainder_chars = pad_char_count % padding->num_chars();
905
12
        const size_t pad_bytes =
906
12
                full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars);
907
12
        const size_t needed = dst_offset + str_byte_len + pad_bytes;
908
12
        ensure_capacity(res_chars, needed, row);
909
910
12
        if constexpr (Impl::is_lpad) {
911
12
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
912
12
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
913
12
            dst_offset += str_byte_len;
914
        } else {
915
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
916
            dst_offset += str_byte_len;
917
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
918
        }
919
12
        res_offsets[row] = dst_offset;
920
12
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE17append_result_rowILb1EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm
Line
Count
Source
877
873
                                  size_t& dst_offset) {
878
873
        if (target_len < 0) {
879
263
            null_map_data[row] = true;
880
263
            res_offsets[row] = dst_offset;
881
263
            return;
882
263
        }
883
884
610
        const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len);
885
610
        const size_t target_char_len = static_cast<size_t>(target_len);
886
610
        if (str_char_len >= target_char_len) {
887
252
            const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>(
888
252
                    str_data, str_byte_len, str_char_len, target_char_len);
889
252
            const size_t needed = dst_offset + truncated_byte_len;
890
252
            ensure_capacity(res_chars, needed, row);
891
252
            memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len);
892
252
            dst_offset += truncated_byte_len;
893
252
            res_offsets[row] = dst_offset;
894
252
            return;
895
252
        }
896
897
358
        if (padding == nullptr || padding->num_chars() == 0) {
898
8
            res_offsets[row] = dst_offset;
899
8
            return;
900
8
        }
901
902
350
        const size_t pad_char_count = target_char_len - str_char_len;
903
350
        const size_t full_cycles = pad_char_count / padding->num_chars();
904
350
        const size_t remainder_chars = pad_char_count % padding->num_chars();
905
350
        const size_t pad_bytes =
906
350
                full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars);
907
350
        const size_t needed = dst_offset + str_byte_len + pad_bytes;
908
350
        ensure_capacity(res_chars, needed, row);
909
910
350
        if constexpr (Impl::is_lpad) {
911
350
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
912
350
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
913
350
            dst_offset += str_byte_len;
914
        } else {
915
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
916
            dst_offset += str_byte_len;
917
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
918
        }
919
350
        res_offsets[row] = dst_offset;
920
350
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE17append_result_rowILb0EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm
Line
Count
Source
877
2.11k
                                  size_t& dst_offset) {
878
2.11k
        if (target_len < 0) {
879
18
            null_map_data[row] = true;
880
18
            res_offsets[row] = dst_offset;
881
18
            return;
882
18
        }
883
884
2.09k
        const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len);
885
2.09k
        const size_t target_char_len = static_cast<size_t>(target_len);
886
2.09k
        if (str_char_len >= target_char_len) {
887
38
            const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>(
888
38
                    str_data, str_byte_len, str_char_len, target_char_len);
889
38
            const size_t needed = dst_offset + truncated_byte_len;
890
38
            ensure_capacity(res_chars, needed, row);
891
38
            memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len);
892
38
            dst_offset += truncated_byte_len;
893
38
            res_offsets[row] = dst_offset;
894
38
            return;
895
38
        }
896
897
2.05k
        if (padding == nullptr || padding->num_chars() == 0) {
898
0
            res_offsets[row] = dst_offset;
899
0
            return;
900
0
        }
901
902
2.05k
        const size_t pad_char_count = target_char_len - str_char_len;
903
2.05k
        const size_t full_cycles = pad_char_count / padding->num_chars();
904
2.05k
        const size_t remainder_chars = pad_char_count % padding->num_chars();
905
2.05k
        const size_t pad_bytes =
906
2.05k
                full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars);
907
2.05k
        const size_t needed = dst_offset + str_byte_len + pad_bytes;
908
2.05k
        ensure_capacity(res_chars, needed, row);
909
910
        if constexpr (Impl::is_lpad) {
911
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
912
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
913
            dst_offset += str_byte_len;
914
2.05k
        } else {
915
2.05k
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
916
2.05k
            dst_offset += str_byte_len;
917
2.05k
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
918
2.05k
        }
919
2.05k
        res_offsets[row] = dst_offset;
920
2.05k
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE17append_result_rowILb1EEEvPKhmiPKNS_12PaddingCharsIXT_EEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSA_IjLm4096ESD_Lm16ELm15EEESF_mRm
Line
Count
Source
877
567
                                  size_t& dst_offset) {
878
567
        if (target_len < 0) {
879
263
            null_map_data[row] = true;
880
263
            res_offsets[row] = dst_offset;
881
263
            return;
882
263
        }
883
884
304
        const size_t str_char_len = get_char_length<is_utf8>(str_data, str_byte_len);
885
304
        const size_t target_char_len = static_cast<size_t>(target_len);
886
304
        if (str_char_len >= target_char_len) {
887
243
            const size_t truncated_byte_len = get_truncated_byte_length<is_utf8>(
888
243
                    str_data, str_byte_len, str_char_len, target_char_len);
889
243
            const size_t needed = dst_offset + truncated_byte_len;
890
243
            ensure_capacity(res_chars, needed, row);
891
243
            memcpy(res_chars.data() + dst_offset, str_data, truncated_byte_len);
892
243
            dst_offset += truncated_byte_len;
893
243
            res_offsets[row] = dst_offset;
894
243
            return;
895
243
        }
896
897
61
        if (padding == nullptr || padding->num_chars() == 0) {
898
8
            res_offsets[row] = dst_offset;
899
8
            return;
900
8
        }
901
902
53
        const size_t pad_char_count = target_char_len - str_char_len;
903
53
        const size_t full_cycles = pad_char_count / padding->num_chars();
904
53
        const size_t remainder_chars = pad_char_count % padding->num_chars();
905
53
        const size_t pad_bytes =
906
53
                full_cycles * padding->pad_string.size() + padding->chars_to_bytes(remainder_chars);
907
53
        const size_t needed = dst_offset + str_byte_len + pad_bytes;
908
53
        ensure_capacity(res_chars, needed, row);
909
910
        if constexpr (Impl::is_lpad) {
911
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
912
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
913
            dst_offset += str_byte_len;
914
53
        } else {
915
53
            memcpy(res_chars.data() + dst_offset, str_data, str_byte_len);
916
53
            dst_offset += str_byte_len;
917
53
            dst_offset += padding->append_to(res_chars.data() + dst_offset, pad_char_count);
918
53
        }
919
53
        res_offsets[row] = dst_offset;
920
53
    }
921
922
    template <bool all_ascii, bool str_const>
923
    static void execute_const_len_const_pad(const ColumnString& strcol,
924
                                            const ColumnInt32::Container& col_len_data,
925
                                            const ColumnString& padcol,
926
                                            ColumnString::Offsets& res_offsets,
927
                                            ColumnString::Chars& res_chars,
928
                                            ColumnUInt8::Container& null_map_data,
929
133
                                            size_t input_rows_count) {
930
133
        constexpr bool is_utf8 = !all_ascii;
931
133
        using PadChars = PaddingChars<is_utf8>;
932
933
133
        const int target_len = col_len_data[0];
934
133
        std::optional<PadChars> padding;
935
133
        const auto pad = padcol.get_data_at(0);
936
133
        if (!pad.empty()) {
937
103
            padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
938
103
        }
939
940
133
        const PadChars* padding_ptr = padding ? &*padding : nullptr;
941
133
        const size_t estimated_total = estimate_const_output_bytes<is_utf8>(
942
133
                strcol.get_chars(), target_len, input_rows_count, padding_ptr);
943
133
        if (estimated_total > 0) {
944
37
            ColumnString::check_chars_length(estimated_total, 0, input_rows_count);
945
37
        }
946
133
        res_chars.resize(estimated_total);
947
948
133
        size_t dst_offset = 0;
949
2.36k
        for (size_t i = 0; i < input_rows_count; ++i) {
950
2.23k
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
951
2.23k
            append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size,
952
2.23k
                                       target_len, padding_ptr, res_chars, res_offsets,
953
2.23k
                                       null_map_data, i, dst_offset);
954
2.23k
        }
955
133
        res_chars.resize(dst_offset);
956
133
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
929
39
                                            size_t input_rows_count) {
930
39
        constexpr bool is_utf8 = !all_ascii;
931
39
        using PadChars = PaddingChars<is_utf8>;
932
933
39
        const int target_len = col_len_data[0];
934
39
        std::optional<PadChars> padding;
935
39
        const auto pad = padcol.get_data_at(0);
936
39
        if (!pad.empty()) {
937
28
            padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
938
28
        }
939
940
39
        const PadChars* padding_ptr = padding ? &*padding : nullptr;
941
39
        const size_t estimated_total = estimate_const_output_bytes<is_utf8>(
942
39
                strcol.get_chars(), target_len, input_rows_count, padding_ptr);
943
39
        if (estimated_total > 0) {
944
12
            ColumnString::check_chars_length(estimated_total, 0, input_rows_count);
945
12
        }
946
39
        res_chars.resize(estimated_total);
947
948
39
        size_t dst_offset = 0;
949
107
        for (size_t i = 0; i < input_rows_count; ++i) {
950
68
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
951
68
            append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size,
952
68
                                       target_len, padding_ptr, res_chars, res_offsets,
953
68
                                       null_map_data, i, dst_offset);
954
68
        }
955
39
        res_chars.resize(dst_offset);
956
39
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
929
27
                                            size_t input_rows_count) {
930
27
        constexpr bool is_utf8 = !all_ascii;
931
27
        using PadChars = PaddingChars<is_utf8>;
932
933
27
        const int target_len = col_len_data[0];
934
27
        std::optional<PadChars> padding;
935
27
        const auto pad = padcol.get_data_at(0);
936
27
        if (!pad.empty()) {
937
23
            padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
938
23
        }
939
940
27
        const PadChars* padding_ptr = padding ? &*padding : nullptr;
941
27
        const size_t estimated_total = estimate_const_output_bytes<is_utf8>(
942
27
                strcol.get_chars(), target_len, input_rows_count, padding_ptr);
943
27
        if (estimated_total > 0) {
944
6
            ColumnString::check_chars_length(estimated_total, 0, input_rows_count);
945
6
        }
946
27
        res_chars.resize(estimated_total);
947
948
27
        size_t dst_offset = 0;
949
54
        for (size_t i = 0; i < input_rows_count; ++i) {
950
27
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
951
27
            append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size,
952
27
                                       target_len, padding_ptr, res_chars, res_offsets,
953
27
                                       null_map_data, i, dst_offset);
954
27
        }
955
27
        res_chars.resize(dst_offset);
956
27
    }
Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringLPadEE27execute_const_len_const_padILb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
_ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
929
39
                                            size_t input_rows_count) {
930
39
        constexpr bool is_utf8 = !all_ascii;
931
39
        using PadChars = PaddingChars<is_utf8>;
932
933
39
        const int target_len = col_len_data[0];
934
39
        std::optional<PadChars> padding;
935
39
        const auto pad = padcol.get_data_at(0);
936
39
        if (!pad.empty()) {
937
28
            padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
938
28
        }
939
940
39
        const PadChars* padding_ptr = padding ? &*padding : nullptr;
941
39
        const size_t estimated_total = estimate_const_output_bytes<is_utf8>(
942
39
                strcol.get_chars(), target_len, input_rows_count, padding_ptr);
943
39
        if (estimated_total > 0) {
944
12
            ColumnString::check_chars_length(estimated_total, 0, input_rows_count);
945
12
        }
946
39
        res_chars.resize(estimated_total);
947
948
39
        size_t dst_offset = 0;
949
2.15k
        for (size_t i = 0; i < input_rows_count; ++i) {
950
2.11k
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
951
2.11k
            append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size,
952
2.11k
                                       target_len, padding_ptr, res_chars, res_offsets,
953
2.11k
                                       null_map_data, i, dst_offset);
954
2.11k
        }
955
39
        res_chars.resize(dst_offset);
956
39
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
929
28
                                            size_t input_rows_count) {
930
28
        constexpr bool is_utf8 = !all_ascii;
931
28
        using PadChars = PaddingChars<is_utf8>;
932
933
28
        const int target_len = col_len_data[0];
934
28
        std::optional<PadChars> padding;
935
28
        const auto pad = padcol.get_data_at(0);
936
28
        if (!pad.empty()) {
937
24
            padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
938
24
        }
939
940
28
        const PadChars* padding_ptr = padding ? &*padding : nullptr;
941
28
        const size_t estimated_total = estimate_const_output_bytes<is_utf8>(
942
28
                strcol.get_chars(), target_len, input_rows_count, padding_ptr);
943
28
        if (estimated_total > 0) {
944
7
            ColumnString::check_chars_length(estimated_total, 0, input_rows_count);
945
7
        }
946
28
        res_chars.resize(estimated_total);
947
948
28
        size_t dst_offset = 0;
949
56
        for (size_t i = 0; i < input_rows_count; ++i) {
950
28
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
951
28
            append_result_row<is_utf8>(reinterpret_cast<const uint8_t*>(str.data), str.size,
952
28
                                       target_len, padding_ptr, res_chars, res_offsets,
953
28
                                       null_map_data, i, dst_offset);
954
28
        }
955
28
        res_chars.resize(dst_offset);
956
28
    }
Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Unexecuted instantiation: _ZN5doris17FunctionStringPadINS_10StringRPadEE27execute_const_len_const_padILb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEES7_RNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
957
958
    template <bool str_const>
959
    static void execute_general(const ColumnString& strcol,
960
                                const ColumnInt32::Container& col_len_data, bool len_const,
961
                                const ColumnString& padcol, bool pad_const,
962
                                ColumnString::Offsets& res_offsets, ColumnString::Chars& res_chars,
963
1.19k
                                ColumnUInt8::Container& null_map_data, size_t input_rows_count) {
964
1.19k
        using PadChars = PaddingChars<true>;
965
1.19k
        std::optional<PadChars> const_padding;
966
1.19k
        const PadChars* const_padding_ptr = nullptr;
967
1.19k
        if (pad_const) {
968
248
            auto pad = padcol.get_data_at(0);
969
248
            if (!pad.empty()) {
970
188
                const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
971
188
                const_padding_ptr = &*const_padding;
972
188
            }
973
248
        }
974
975
1.19k
        res_chars.resize(strcol.get_chars().size());
976
1.19k
        size_t dst_offset = 0;
977
2.57k
        for (size_t i = 0; i < input_rows_count; ++i) {
978
1.38k
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
979
1.38k
            const int target_len = col_len_data[len_const ? 0 : i];
980
981
1.38k
            const PadChars* padding_ptr = const_padding_ptr;
982
1.38k
            std::optional<PadChars> row_padding;
983
1.38k
            if (!pad_const) {
984
1.13k
                auto pad = padcol.get_data_at(i);
985
1.13k
                if (!pad.empty()) {
986
911
                    row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
987
911
                    padding_ptr = &*row_padding;
988
911
                } else {
989
226
                    padding_ptr = nullptr;
990
226
                }
991
1.13k
            }
992
993
1.38k
            append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size,
994
1.38k
                                    target_len, padding_ptr, res_chars, res_offsets, null_map_data,
995
1.38k
                                    i, dst_offset);
996
1.38k
        }
997
1.19k
        res_chars.resize(dst_offset);
998
1.19k
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE15execute_generalILb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
963
563
                                ColumnUInt8::Container& null_map_data, size_t input_rows_count) {
964
563
        using PadChars = PaddingChars<true>;
965
563
        std::optional<PadChars> const_padding;
966
563
        const PadChars* const_padding_ptr = nullptr;
967
563
        if (pad_const) {
968
62
            auto pad = padcol.get_data_at(0);
969
62
            if (!pad.empty()) {
970
47
                const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
971
47
                const_padding_ptr = &*const_padding;
972
47
            }
973
62
        }
974
975
563
        res_chars.resize(strcol.get_chars().size());
976
563
        size_t dst_offset = 0;
977
1.22k
        for (size_t i = 0; i < input_rows_count; ++i) {
978
660
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
979
660
            const int target_len = col_len_data[len_const ? 0 : i];
980
981
660
            const PadChars* padding_ptr = const_padding_ptr;
982
660
            std::optional<PadChars> row_padding;
983
660
            if (!pad_const) {
984
598
                auto pad = padcol.get_data_at(i);
985
598
                if (!pad.empty()) {
986
515
                    row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
987
515
                    padding_ptr = &*row_padding;
988
515
                } else {
989
83
                    padding_ptr = nullptr;
990
83
                }
991
598
            }
992
993
660
            append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size,
994
660
                                    target_len, padding_ptr, res_chars, res_offsets, null_map_data,
995
660
                                    i, dst_offset);
996
660
        }
997
563
        res_chars.resize(dst_offset);
998
563
    }
_ZN5doris17FunctionStringPadINS_10StringLPadEE15execute_generalILb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
963
186
                                ColumnUInt8::Container& null_map_data, size_t input_rows_count) {
964
186
        using PadChars = PaddingChars<true>;
965
186
        std::optional<PadChars> const_padding;
966
186
        const PadChars* const_padding_ptr = nullptr;
967
186
        if (pad_const) {
968
62
            auto pad = padcol.get_data_at(0);
969
62
            if (!pad.empty()) {
970
47
                const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
971
47
                const_padding_ptr = &*const_padding;
972
47
            }
973
62
        }
974
975
186
        res_chars.resize(strcol.get_chars().size());
976
186
        size_t dst_offset = 0;
977
372
        for (size_t i = 0; i < input_rows_count; ++i) {
978
186
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
979
186
            const int target_len = col_len_data[len_const ? 0 : i];
980
981
186
            const PadChars* padding_ptr = const_padding_ptr;
982
186
            std::optional<PadChars> row_padding;
983
186
            if (!pad_const) {
984
124
                auto pad = padcol.get_data_at(i);
985
124
                if (!pad.empty()) {
986
94
                    row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
987
94
                    padding_ptr = &*row_padding;
988
94
                } else {
989
30
                    padding_ptr = nullptr;
990
30
                }
991
124
            }
992
993
186
            append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size,
994
186
                                    target_len, padding_ptr, res_chars, res_offsets, null_map_data,
995
186
                                    i, dst_offset);
996
186
        }
997
186
        res_chars.resize(dst_offset);
998
186
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE15execute_generalILb0EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
963
257
                                ColumnUInt8::Container& null_map_data, size_t input_rows_count) {
964
257
        using PadChars = PaddingChars<true>;
965
257
        std::optional<PadChars> const_padding;
966
257
        const PadChars* const_padding_ptr = nullptr;
967
257
        if (pad_const) {
968
62
            auto pad = padcol.get_data_at(0);
969
62
            if (!pad.empty()) {
970
47
                const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
971
47
                const_padding_ptr = &*const_padding;
972
47
            }
973
62
        }
974
975
257
        res_chars.resize(strcol.get_chars().size());
976
257
        size_t dst_offset = 0;
977
610
        for (size_t i = 0; i < input_rows_count; ++i) {
978
353
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
979
353
            const int target_len = col_len_data[len_const ? 0 : i];
980
981
353
            const PadChars* padding_ptr = const_padding_ptr;
982
353
            std::optional<PadChars> row_padding;
983
353
            if (!pad_const) {
984
291
                auto pad = padcol.get_data_at(i);
985
291
                if (!pad.empty()) {
986
208
                    row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
987
208
                    padding_ptr = &*row_padding;
988
208
                } else {
989
83
                    padding_ptr = nullptr;
990
83
                }
991
291
            }
992
993
353
            append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size,
994
353
                                    target_len, padding_ptr, res_chars, res_offsets, null_map_data,
995
353
                                    i, dst_offset);
996
353
        }
997
257
        res_chars.resize(dst_offset);
998
257
    }
_ZN5doris17FunctionStringPadINS_10StringRPadEE15execute_generalILb1EEEvRKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEbS7_bRNS8_IjLm4096ESB_Lm16ELm15EEERNS8_IhLm4096ESB_Lm16ELm15EEESI_m
Line
Count
Source
963
186
                                ColumnUInt8::Container& null_map_data, size_t input_rows_count) {
964
186
        using PadChars = PaddingChars<true>;
965
186
        std::optional<PadChars> const_padding;
966
186
        const PadChars* const_padding_ptr = nullptr;
967
186
        if (pad_const) {
968
62
            auto pad = padcol.get_data_at(0);
969
62
            if (!pad.empty()) {
970
47
                const_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
971
47
                const_padding_ptr = &*const_padding;
972
47
            }
973
62
        }
974
975
186
        res_chars.resize(strcol.get_chars().size());
976
186
        size_t dst_offset = 0;
977
372
        for (size_t i = 0; i < input_rows_count; ++i) {
978
186
            auto str = strcol.get_data_at(index_check_const<str_const>(i));
979
186
            const int target_len = col_len_data[len_const ? 0 : i];
980
981
186
            const PadChars* padding_ptr = const_padding_ptr;
982
186
            std::optional<PadChars> row_padding;
983
186
            if (!pad_const) {
984
124
                auto pad = padcol.get_data_at(i);
985
124
                if (!pad.empty()) {
986
94
                    row_padding.emplace(reinterpret_cast<const uint8_t*>(pad.data), pad.size);
987
94
                    padding_ptr = &*row_padding;
988
94
                } else {
989
30
                    padding_ptr = nullptr;
990
30
                }
991
124
            }
992
993
186
            append_result_row<true>(reinterpret_cast<const uint8_t*>(str.data), str.size,
994
186
                                    target_len, padding_ptr, res_chars, res_offsets, null_map_data,
995
186
                                    i, dst_offset);
996
186
        }
997
186
        res_chars.resize(dst_offset);
998
186
    }
999
};
1000
1001
#include "common/compile_check_avoid_end.h"
1002
} // namespace doris