Coverage Report

Created: 2026-03-15 04:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <sys/types.h>
22
23
#include <algorithm>
24
#include <array>
25
#include <boost/iterator/iterator_facade.hpp>
26
#include <boost/locale.hpp>
27
#include <climits>
28
#include <cmath>
29
#include <cstddef>
30
#include <cstdlib>
31
#include <cstring>
32
#include <iomanip>
33
#include <memory>
34
#include <ostream>
35
#include <random>
36
#include <sstream>
37
#include <tuple>
38
#include <type_traits>
39
#include <unordered_map>
40
#include <utility>
41
#include <variant>
42
#include <vector>
43
44
#include "common/compiler_util.h" // IWYU pragma: keep
45
#include "common/exception.h"
46
#include "common/status.h"
47
#include "core/block/block.h"
48
#include "core/block/column_numbers.h"
49
#include "core/block/column_with_type_and_name.h"
50
#include "core/column/column.h"
51
#include "core/column/column_const.h"
52
#include "core/column/column_varbinary.h"
53
#include "core/column/column_vector.h"
54
#include "core/data_type/data_type.h"
55
#include "core/data_type/define_primitive_type.h"
56
#include "core/data_type/primitive_type.h"
57
#include "core/memcmp_small.h"
58
#include "core/memcpy_small.h"
59
#include "core/pod_array.h"
60
#include "core/pod_array_fwd.h"
61
#include "core/types.h"
62
#include "core/value/decimalv2_value.h"
63
#include "exec/common/hash_table/phmap_fwd_decl.h"
64
#include "exec/common/int_exp.h"
65
#include "exec/common/template_helpers.hpp"
66
#include "exprs/aggregate/aggregate_function.h"
67
#include "exprs/function/function_needs_to_handle_null.h"
68
#include "util/raw_value.h"
69
#include "util/sha.h"
70
#include "util/string_search.hpp"
71
#include "util/string_util.h"
72
#include "util/utf8_check.h"
73
74
#ifndef USE_LIBCPP
75
#include <memory_resource>
76
#define PMR std::pmr
77
#else
78
#include <boost/container/pmr/monotonic_buffer_resource.hpp>
79
#include <boost/container/pmr/vector.hpp>
80
#define PMR boost::container::pmr
81
#endif
82
83
#include <fmt/format.h>
84
#include <unicode/normalizer2.h>
85
#include <unicode/stringpiece.h>
86
#include <unicode/unistr.h>
87
88
#include <cstdint>
89
#include <string>
90
#include <string_view>
91
92
#include "core/assert_cast.h"
93
#include "core/column/column_array.h"
94
#include "core/column/column_decimal.h"
95
#include "core/column/column_nullable.h"
96
#include "core/column/column_string.h"
97
#include "core/data_type/data_type_array.h"
98
#include "core/data_type/data_type_decimal.h"
99
#include "core/data_type/data_type_nullable.h"
100
#include "core/data_type/data_type_number.h"
101
#include "core/data_type/data_type_string.h"
102
#include "core/string_ref.h"
103
#include "exec/common/pinyin.h"
104
#include "exec/common/stringop_substring.h"
105
#include "exec/common/util.hpp"
106
#include "exprs/function/function.h"
107
#include "exprs/function/function_helpers.h"
108
#include "exprs/function_context.h"
109
#include "exprs/math_functions.h"
110
#include "pugixml.hpp"
111
#include "util/md5.h"
112
#include "util/simd/vstring_function.h"
113
#include "util/sm3.h"
114
#include "util/url_coding.h"
115
#include "util/url_parser.h"
116
117
namespace doris {
118
#include "common/compile_check_avoid_begin.h"
119
class FunctionStrcmp : public IFunction {
120
public:
121
    static constexpr auto name = "strcmp";
122
123
2
    static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); }
124
125
1
    String get_name() const override { return name; }
126
127
0
    size_t get_number_of_arguments() const override { return 2; }
128
129
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
130
0
        return std::make_shared<DataTypeInt8>();
131
0
    }
132
133
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
134
0
                        uint32_t result, size_t input_rows_count) const override {
135
0
        const auto& [arg0_column, arg0_const] =
136
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
137
0
        const auto& [arg1_column, arg1_const] =
138
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
139
140
0
        auto result_column = ColumnInt8::create(input_rows_count);
141
142
0
        if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) {
143
0
            if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) {
144
0
                if (arg0_const) {
145
0
                    scalar_vector(arg0->get_data_at(0), *arg1, *result_column);
146
0
                } else if (arg1_const) {
147
0
                    vector_scalar(*arg0, arg1->get_data_at(0), *result_column);
148
0
                } else {
149
0
                    vector_vector(*arg0, *arg1, *result_column);
150
0
                }
151
0
            }
152
0
        }
153
154
0
        block.replace_by_position(result, std::move(result_column));
155
0
        return Status::OK();
156
0
    }
157
158
private:
159
0
    static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) {
160
0
        size_t size = vec1.size();
161
0
        for (size_t i = 0; i < size; ++i) {
162
0
            res.get_data()[i] = str.compare(vec1.get_data_at(i));
163
0
        }
164
0
    }
165
166
0
    static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) {
167
0
        size_t size = vec0.size();
168
0
        for (size_t i = 0; i < size; ++i) {
169
0
            res.get_data()[i] = vec0.get_data_at(i).compare(str);
170
0
        }
171
0
    }
172
173
0
    static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) {
174
0
        size_t size = vec0.size();
175
0
        for (size_t i = 0; i < size; ++i) {
176
0
            res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i));
177
0
        }
178
0
    }
179
};
180
181
class FunctionAutoPartitionName : public IFunction {
182
public:
183
    static constexpr auto name = "auto_partition_name";
184
2
    static FunctionPtr create() { return std::make_shared<FunctionAutoPartitionName>(); }
185
0
    String get_name() const override { return name; }
186
0
    size_t get_number_of_arguments() const override { return 0; }
187
1
    bool is_variadic() const override { return true; }
188
0
    bool use_default_implementation_for_nulls() const override { return false; }
189
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
190
0
        return std::make_shared<DataTypeString>();
191
0
    }
192
193
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
194
0
                        uint32_t result, size_t input_rows_count) const override {
195
0
        size_t argument_size = arguments.size();
196
0
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
197
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
198
0
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
199
0
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
200
0
        std::vector<bool> is_const_args(argument_size);
201
0
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
202
0
        std::vector<ColumnPtr> argument_null_columns(argument_size);
203
204
0
        std::vector<ColumnPtr> argument_columns(argument_size);
205
0
        for (int i = 0; i < argument_size; ++i) {
206
0
            argument_columns[i] =
207
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
208
0
            if (const auto* nullable =
209
0
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
210
0
                null_list[i] = &nullable->get_null_map_data();
211
0
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
212
0
                argument_columns[i] = nullable->get_nested_column_ptr();
213
0
            } else {
214
0
                null_list[i] = &const_null_map->get_data();
215
0
            }
216
217
0
            const auto& [col, is_const] =
218
0
                    unpack_if_const(block.get_by_position(arguments[i]).column);
219
220
0
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
221
0
            chars_list[i] = &col_str->get_chars();
222
0
            offsets_list[i] = &col_str->get_offsets();
223
0
            is_const_args[i] = is_const;
224
0
        }
225
226
0
        auto res = ColumnString::create();
227
0
        auto& res_data = res->get_chars();
228
0
        auto& res_offset = res->get_offsets();
229
0
        res_offset.resize(input_rows_count);
230
231
0
        const char* partition_type = chars_list[0]->raw_data();
232
        // partition type is list|range
233
0
        if (std::strncmp(partition_type, "list", 4) == 0) {
234
0
            return _auto_partition_type_of_list(chars_list, offsets_list, is_const_args, null_list,
235
0
                                                res_data, res_offset, input_rows_count,
236
0
                                                argument_size, block, result, res);
237
0
        } else {
238
0
            return _auto_partition_type_of_range(chars_list, offsets_list, is_const_args, res_data,
239
0
                                                 res_offset, input_rows_count, argument_size, block,
240
0
                                                 result, res);
241
0
        }
242
0
        return Status::OK();
243
0
    }
244
245
private:
246
0
    std::u16string _string_to_u16string(const std::string& str) const {
247
0
        return boost::locale::conv::utf_to_utf<char16_t>(str);
248
0
    }
249
250
0
    std::string _string_to_unicode(const std::u16string& s) const {
251
0
        std::string res_s;
252
0
        res_s.reserve(s.size());
253
0
        if (s.length() > 0 && s[0] == '-') {
254
0
            res_s += '_';
255
0
        }
256
0
        for (int i = 0; i < s.length(); i++) {
257
0
            char16_t ch = s[i];
258
0
            if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
259
0
                res_s += ch;
260
0
            } else {
261
0
                int unicodeValue = _get_code_point_at(s, i);
262
0
                res_s += fmt::format("{:02x}", static_cast<uint32_t>(unicodeValue));
263
0
            }
264
0
        }
265
0
        return res_s;
266
0
    }
267
268
0
    int _get_code_point_at(const std::u16string& str, std::size_t index) const {
269
0
        char16_t first = str[index];
270
        // [0xD800,0xDBFF] is the scope of the first code unit
271
0
        if ((first >= 0xD800 && first <= 0xDBFF) && (index + 1 < str.size())) {
272
0
            char16_t second = str[index + 1];
273
            // [0xDC00,0xDFFF] is the scope of the second code unit
274
0
            if (second >= 0xDC00 && second <= 0xDFFF) {
275
0
                return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000;
276
0
            }
277
0
        }
278
279
0
        return first;
280
0
    }
281
    Status _auto_partition_type_of_list(std::vector<const ColumnString::Chars*>& chars_list,
282
                                        std::vector<const ColumnString::Offsets*>& offsets_list,
283
                                        std::vector<bool>& is_const_args,
284
                                        const std::vector<const ColumnUInt8::Container*>& null_list,
285
                                        auto& res_data, auto& res_offset, size_t input_rows_count,
286
                                        size_t argument_size, Block& block, uint32_t result,
287
0
                                        auto& res) const {
288
0
        int curr_len = 0;
289
0
        for (int row = 0; row < input_rows_count; row++) {
290
0
            std::string res_p;
291
0
            res_p.reserve(argument_size * 5);
292
0
            res_p += 'p';
293
0
            for (int col = 1; col < argument_size; col++) {
294
0
                const auto& current_offsets = *offsets_list[col];
295
0
                const auto& current_chars = *chars_list[col];
296
0
                const auto& current_nullmap = *null_list[col];
297
298
0
                if (current_nullmap[row]) {
299
0
                    res_p += 'X';
300
0
                } else {
301
0
                    auto idx = index_check_const(row, is_const_args[col]);
302
303
0
                    int size = current_offsets[idx] - current_offsets[idx - 1];
304
0
                    const char* raw_chars =
305
0
                            reinterpret_cast<const char*>(&current_chars[current_offsets[idx - 1]]);
306
                    // convert string to u16string in order to convert to unicode strings
307
0
                    const std::string raw_str(raw_chars, size);
308
0
                    auto u16string = _string_to_u16string(raw_str);
309
0
                    res_p += _string_to_unicode(u16string) + std::to_string(u16string.size());
310
0
                }
311
0
            }
312
313
            // check the name of length
314
0
            int len = res_p.size();
315
0
            if (len > 50) {
316
0
                res_p = std::format("{}_{:08x}", res_p.substr(0, 50), to_hash_code(res_p));
317
0
                len = res_p.size();
318
0
            }
319
0
            curr_len += len;
320
0
            res_data.resize(curr_len);
321
0
            memcpy(&res_data[res_offset[row - 1]], res_p.c_str(), len);
322
0
            res_offset[row] = res_offset[row - 1] + len;
323
0
        }
324
0
        block.get_by_position(result).column = std::move(res);
325
0
        return Status::OK();
326
0
    }
327
328
    size_t _copy_date_str_of_len_to_res_data(auto& res_data, auto& res_offset,
329
                                             std::vector<std::string>& date_str, size_t row,
330
0
                                             size_t len) const {
331
0
        size_t curr_len = 1;
332
0
        for (int j = 0; j < len; j++) {
333
0
            memcpy(&res_data[res_offset[row - 1]] + curr_len, date_str[j].c_str(),
334
0
                   date_str[j].size());
335
0
            curr_len += date_str[j].size();
336
0
        }
337
0
        return curr_len;
338
0
    }
339
340
    Status _auto_partition_type_of_range(std::vector<const ColumnString::Chars*>& chars_list,
341
                                         std::vector<const ColumnString::Offsets*>& offsets_list,
342
                                         std::vector<bool>& is_const_args, auto& res_data,
343
                                         auto& res_offset, size_t input_rows_count,
344
                                         size_t argument_size, Block& block, uint32_t result,
345
0
                                         auto& res) const {
346
0
        const char* range_type = chars_list[1]->raw_data();
347
348
0
        res_data.resize(15 * input_rows_count);
349
0
        for (int i = 0; i < input_rows_count; i++) {
350
0
            const auto& current_offsets = *offsets_list[2];
351
0
            const auto& current_chars = *chars_list[2];
352
353
0
            auto idx = index_check_const(i, is_const_args[2]);
354
0
            int size = current_offsets[idx] - current_offsets[idx - 1];
355
0
            const char* tmp =
356
0
                    reinterpret_cast<const char*>(&current_chars[current_offsets[idx - 1]]);
357
0
            std::string to_split_s(tmp, size);
358
359
            // check the str if it is date|datetime
360
0
            RE2 date_regex(R"(^\d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?$)");
361
0
            if (!RE2::FullMatch(to_split_s, date_regex)) {
362
0
                return Status::InvalidArgument("The range partition only support DATE|DATETIME");
363
0
            }
364
365
            // split date_str from (yyyy-mm-dd hh:mm:ss) to ([yyyy, mm, dd, hh, mm, ss])
366
0
            std::vector<std::string> date_str(6);
367
0
            date_str[0] = to_split_s.substr(0, 4);
368
0
            for (int ni = 5, j = 1; ni <= size; ni += 3, j++) {
369
0
                date_str[j] = to_split_s.substr(ni, 2);
370
0
            }
371
0
            int curr_len = 0;
372
373
0
            res_data[res_offset[i - 1]] = 'p';
374
            // raw => 2022-12-12 11:30:20
375
            // year => 2022 01 01 00 00 00
376
            // month => 2022 12 01 00 00 00
377
            // day => 2022 12 12 00 00 00
378
            // hour => 2022 12 12 11 00 00
379
            // minute => 2022 12  11 30 00
380
            // second => 2022 12 12 12 30 20
381
382
0
            if (!strncmp(range_type, "year", 4)) {
383
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 1);
384
0
                memcpy(&res_data[res_offset[i - 1]] + curr_len, "0101", 4);
385
0
                curr_len += 4;
386
0
            } else if (!strncmp(range_type, "month", 5)) {
387
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 2);
388
0
                memcpy(&res_data[res_offset[i - 1]] + curr_len, "01", 2);
389
0
                curr_len += 2;
390
0
            } else if (!strncmp(range_type, "day", 3)) {
391
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 3);
392
0
            } else if (!strncmp(range_type, "hour", 4)) {
393
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 4);
394
0
            } else if (!strncmp(range_type, "minute", 6)) {
395
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 5);
396
0
            } else if (!strncmp(range_type, "second", 6)) {
397
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 6);
398
0
            }
399
400
            // fill in zero
401
0
            int zero = 15 - curr_len;
402
0
            std::fill_n(&res_data[res_offset[i - 1]] + curr_len, zero, '0');
403
0
            curr_len += zero;
404
0
            res_offset[i] = res_offset[i - 1] + curr_len;
405
0
        }
406
0
        block.get_by_position(result).column = std::move(res);
407
0
        return Status::OK();
408
0
    }
409
410
0
    int32_t to_hash_code(const std::string& str) const {
411
0
        uint64_t h = 0;
412
0
        for (uint8_t c : str) {
413
0
            h = (h * 31U + c) & 0xFFFFFFFFU;
414
0
        }
415
0
        return static_cast<int32_t>(h);
416
0
    }
417
};
418
419
template <typename Impl>
420
class FunctionSubstring : public IFunction {
421
public:
422
    static constexpr auto name = SubstringUtil::name;
423
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE8get_nameB5cxx11Ev
Line
Count
Source
423
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE8get_nameB5cxx11Ev
Line
Count
Source
423
1
    String get_name() const override { return name; }
424
1.84k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr3ImplEE6createEv
Line
Count
Source
424
1.75k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr2ImplEE6createEv
Line
Count
Source
424
87
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
425
426
1.83k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
1.83k
        return std::make_shared<DataTypeString>();
428
1.83k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
426
1.75k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
1.75k
        return std::make_shared<DataTypeString>();
428
1.75k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
426
85
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
85
        return std::make_shared<DataTypeString>();
428
85
    }
429
1.83k
    DataTypes get_variadic_argument_types_impl() const override {
430
1.83k
        return Impl::get_variadic_argument_types();
431
1.83k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
429
1.75k
    DataTypes get_variadic_argument_types_impl() const override {
430
1.75k
        return Impl::get_variadic_argument_types();
431
1.75k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
429
86
    DataTypes get_variadic_argument_types_impl() const override {
430
86
        return Impl::get_variadic_argument_types();
431
86
    }
432
1.83k
    size_t get_number_of_arguments() const override {
433
1.83k
        return get_variadic_argument_types_impl().size();
434
1.83k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE23get_number_of_argumentsEv
Line
Count
Source
432
1.75k
    size_t get_number_of_arguments() const override {
433
1.75k
        return get_variadic_argument_types_impl().size();
434
1.75k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE23get_number_of_argumentsEv
Line
Count
Source
432
85
    size_t get_number_of_arguments() const override {
433
85
        return get_variadic_argument_types_impl().size();
434
85
    }
435
436
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
437
1.06k
                        uint32_t result, size_t input_rows_count) const override {
438
1.06k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
1.06k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
437
1.00k
                        uint32_t result, size_t input_rows_count) const override {
438
1.00k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
1.00k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
437
55
                        uint32_t result, size_t input_rows_count) const override {
438
55
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
55
    }
440
};
441
442
struct Substr3Impl {
443
1.75k
    static DataTypes get_variadic_argument_types() {
444
1.75k
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
445
1.75k
                std::make_shared<DataTypeInt32>()};
446
1.75k
    }
447
448
    static Status execute_impl(FunctionContext* context, Block& block,
449
                               const ColumnNumbers& arguments, uint32_t result,
450
1.00k
                               size_t input_rows_count) {
451
1.00k
        SubstringUtil::substring_execute(block, arguments, result, input_rows_count);
452
1.00k
        return Status::OK();
453
1.00k
    }
454
};
455
456
struct Substr2Impl {
457
86
    static DataTypes get_variadic_argument_types() {
458
86
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
459
86
    }
460
461
    static Status execute_impl(FunctionContext* context, Block& block,
462
                               const ColumnNumbers& arguments, uint32_t result,
463
55
                               size_t input_rows_count) {
464
55
        auto col_len = ColumnInt32::create(input_rows_count);
465
55
        auto& strlen_data = col_len->get_data();
466
467
55
        ColumnPtr str_col;
468
55
        bool str_const;
469
55
        std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
470
471
55
        const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
472
473
55
        if (str_const) {
474
18
            std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
475
37
        } else {
476
101
            for (int i = 0; i < input_rows_count; ++i) {
477
64
                strlen_data[i] = str_offset[i] - str_offset[i - 1];
478
64
            }
479
37
        }
480
481
        // we complete the column2(strlen) with the default value - each row's strlen.
482
55
        block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"});
483
55
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
484
485
55
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
486
55
        return Status::OK();
487
55
    }
488
};
489
490
template <bool Reverse>
491
class FunctionMaskPartial;
492
493
class FunctionMask : public IFunction {
494
public:
495
    static constexpr auto name = "mask";
496
    static constexpr unsigned char DEFAULT_UPPER_MASK = 'X';
497
    static constexpr unsigned char DEFAULT_LOWER_MASK = 'x';
498
    static constexpr unsigned char DEFAULT_NUMBER_MASK = 'n';
499
0
    String get_name() const override { return name; }
500
2
    static FunctionPtr create() { return std::make_shared<FunctionMask>(); }
501
502
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
503
0
        return std::make_shared<DataTypeString>();
504
0
    }
505
506
0
    size_t get_number_of_arguments() const override { return 0; }
507
508
0
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1, 2, 3}; }
509
510
1
    bool is_variadic() const override { return true; }
511
512
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
513
0
                        uint32_t result, size_t input_rows_count) const override {
514
0
        DCHECK_GE(arguments.size(), 1);
515
0
        DCHECK_LE(arguments.size(), 4);
516
517
0
        char upper = DEFAULT_UPPER_MASK, lower = DEFAULT_LOWER_MASK, number = DEFAULT_NUMBER_MASK;
518
519
0
        auto res = ColumnString::create();
520
0
        const auto& source_column =
521
0
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
522
523
0
        if (arguments.size() > 1) {
524
0
            const auto& col = *block.get_by_position(arguments[1]).column;
525
0
            auto string_ref = col.get_data_at(0);
526
0
            if (string_ref.size > 0) {
527
0
                upper = *string_ref.data;
528
0
            }
529
0
        }
530
531
0
        if (arguments.size() > 2) {
532
0
            const auto& col = *block.get_by_position(arguments[2]).column;
533
0
            auto string_ref = col.get_data_at(0);
534
0
            if (string_ref.size > 0) {
535
0
                lower = *string_ref.data;
536
0
            }
537
0
        }
538
539
0
        if (arguments.size() > 3) {
540
0
            const auto& col = *block.get_by_position(arguments[3]).column;
541
0
            auto string_ref = col.get_data_at(0);
542
0
            if (string_ref.size > 0) {
543
0
                number = *string_ref.data;
544
0
            }
545
0
        }
546
547
0
        if (arguments.size() > 4) {
548
0
            return Status::InvalidArgument(
549
0
                    fmt::format("too many arguments for function {}", get_name()));
550
0
        }
551
552
0
        vector_mask(source_column, *res, upper, lower, number);
553
554
0
        block.get_by_position(result).column = std::move(res);
555
556
0
        return Status::OK();
557
0
    }
558
    friend class FunctionMaskPartial<true>;
559
    friend class FunctionMaskPartial<false>;
560
561
private:
562
    static void vector_mask(const ColumnString& source, ColumnString& result, const char upper,
563
0
                            const char lower, const char number) {
564
0
        result.get_chars().resize(source.get_chars().size());
565
0
        result.get_offsets().resize(source.get_offsets().size());
566
0
        memcpy_small_allow_read_write_overflow15(
567
0
                result.get_offsets().data(), source.get_offsets().data(),
568
0
                source.get_offsets().size() * sizeof(ColumnString::Offset));
569
570
0
        const unsigned char* src = source.get_chars().data();
571
0
        const size_t size = source.get_chars().size();
572
0
        unsigned char* res = result.get_chars().data();
573
0
        mask(src, size, upper, lower, number, res);
574
0
    }
575
576
    static void mask(const unsigned char* __restrict src, const size_t size,
577
                     const unsigned char upper, const unsigned char lower,
578
0
                     const unsigned char number, unsigned char* __restrict res) {
579
0
        for (size_t i = 0; i != size; ++i) {
580
0
            auto c = src[i];
581
0
            if (c >= 'A' && c <= 'Z') {
582
0
                res[i] = upper;
583
0
            } else if (c >= 'a' && c <= 'z') {
584
0
                res[i] = lower;
585
0
            } else if (c >= '0' && c <= '9') {
586
0
                res[i] = number;
587
0
            } else {
588
0
                res[i] = c;
589
0
            }
590
0
        }
591
0
    }
592
};
593
594
template <bool Reverse>
595
class FunctionMaskPartial : public IFunction {
596
public:
597
    static constexpr auto name = Reverse ? "mask_last_n" : "mask_first_n";
598
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE8get_nameB5cxx11Ev
599
4
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
_ZN5doris19FunctionMaskPartialILb1EE6createEv
Line
Count
Source
599
2
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
_ZN5doris19FunctionMaskPartialILb0EE6createEv
Line
Count
Source
599
2
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
600
601
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
602
0
        return std::make_shared<DataTypeString>();
603
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
604
605
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE23get_number_of_argumentsEv
606
607
2
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionMaskPartialILb1EE11is_variadicEv
Line
Count
Source
607
1
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionMaskPartialILb0EE11is_variadicEv
Line
Count
Source
607
1
    bool is_variadic() const override { return true; }
608
609
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
610
0
                        uint32_t result, size_t input_rows_count) const override {
611
0
        auto res = ColumnString::create();
612
0
        auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
613
0
        const auto& source_column = assert_cast<const ColumnString&>(*col);
614
615
0
        if (arguments.size() == 1) { // no 2nd arg, just mask all
616
0
            FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK,
617
0
                                      FunctionMask::DEFAULT_LOWER_MASK,
618
0
                                      FunctionMask::DEFAULT_NUMBER_MASK);
619
0
        } else {
620
0
            const auto& [col_2nd, is_const] =
621
0
                    unpack_if_const(block.get_by_position(arguments[1]).column);
622
623
0
            const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd);
624
625
0
            if (is_const) {
626
0
                RETURN_IF_ERROR(vector<true>(source_column, col_n, *res));
627
0
            } else {
628
0
                RETURN_IF_ERROR(vector<false>(source_column, col_n, *res));
629
0
            }
630
0
        }
631
632
0
        block.get_by_position(result).column = std::move(res);
633
634
0
        return Status::OK();
635
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
636
637
private:
638
    template <bool is_const>
639
0
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
0
        const auto num_rows = src.size();
641
0
        const auto* chars = src.get_chars().data();
642
0
        const auto* offsets = src.get_offsets().data();
643
0
        result.get_chars().resize(src.get_chars().size());
644
0
        result.get_offsets().resize(src.get_offsets().size());
645
0
        memcpy_small_allow_read_write_overflow15(
646
0
                result.get_offsets().data(), src.get_offsets().data(),
647
0
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
0
        auto* res = result.get_chars().data();
649
650
0
        const auto& col_n_data = col_n.get_data();
651
652
0
        for (ssize_t i = 0; i != num_rows; ++i) {
653
0
            auto offset = offsets[i - 1];
654
0
            int len = offsets[i] - offset;
655
0
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
0
            if (n < 0) [[unlikely]] {
658
0
                return Status::InvalidArgument(
659
0
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
0
                        name, n);
661
0
            }
662
663
0
            if constexpr (Reverse) {
664
0
                auto start = std::max(len - n, 0);
665
0
                if (start > 0) {
666
0
                    memcpy(&res[offset], &chars[offset], start);
667
0
                }
668
0
                offset += start;
669
0
            } else {
670
0
                if (n < len) {
671
0
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
0
                }
673
0
            }
674
675
0
            len = std::min(n, len);
676
0
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
0
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
0
                               &res[offset]);
679
0
        }
680
681
0
        return Status::OK();
682
0
    }
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb1EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb1EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb0EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb0EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
683
};
684
685
class FunctionLeft : public IFunction {
686
public:
687
    static constexpr auto name = "left";
688
168
    static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
689
1
    String get_name() const override { return name; }
690
166
    size_t get_number_of_arguments() const override { return 2; }
691
166
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
692
166
        return std::make_shared<DataTypeString>();
693
166
    }
694
695
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
696
133
                        uint32_t result, size_t input_rows_count) const override {
697
133
        DCHECK_EQ(arguments.size(), 2);
698
133
        auto res = ColumnString::create();
699
133
        bool col_const[2];
700
133
        ColumnPtr argument_columns[2];
701
399
        for (int i = 0; i < 2; ++i) {
702
266
            std::tie(argument_columns[i], col_const[i]) =
703
266
                    unpack_if_const(block.get_by_position(arguments[i]).column);
704
266
        }
705
706
133
        const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]);
707
133
        const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]);
708
133
        const auto is_ascii = str_col.is_ascii();
709
710
133
        std::visit(
711
133
                [&](auto is_ascii, auto str_const, auto len_const) {
712
133
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
133
                                                             input_rows_count);
714
133
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
711
9
                [&](auto is_ascii, auto str_const, auto len_const) {
712
9
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
9
                                                             input_rows_count);
714
9
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
711
8
                [&](auto is_ascii, auto str_const, auto len_const) {
712
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
8
                                                             input_rows_count);
714
8
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
711
8
                [&](auto is_ascii, auto str_const, auto len_const) {
712
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
8
                                                             input_rows_count);
714
8
                },
715
133
                make_bool_variant(is_ascii), make_bool_variant(col_const[0]),
716
133
                make_bool_variant(col_const[1]));
717
718
133
        block.get_by_position(result).column = std::move(res);
719
133
        return Status::OK();
720
133
    }
721
722
    template <bool is_ascii, bool str_const, bool len_const>
723
    static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res,
724
133
                         size_t size) {
725
133
        auto& res_chars = res.get_chars();
726
133
        auto& res_offsets = res.get_offsets();
727
133
        res_offsets.resize(size);
728
133
        const auto& len_data = len_col.get_data();
729
730
133
        if constexpr (str_const) {
731
44
            res_chars.reserve(size * (str_col.get_chars().size()));
732
89
        } else {
733
89
            res_chars.reserve(str_col.get_chars().size());
734
89
        }
735
736
320
        for (int i = 0; i < size; ++i) {
737
187
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
187
            int len = len_data[index_check_const<len_const>(i)];
739
187
            if (len <= 0 || str.empty()) {
740
55
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
55
                continue;
742
55
            }
743
744
132
            const char* begin = str.begin();
745
132
            const char* p = begin;
746
747
132
            if constexpr (is_ascii) {
748
78
                p = begin + std::min(len, static_cast<int>(str.size));
749
78
            } else {
750
54
                const char* end = str.end();
751
396
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
342
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
342
                }
754
54
            }
755
756
132
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
132
                                                                    res_offsets);
758
132
        }
759
133
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
9
                         size_t size) {
725
9
        auto& res_chars = res.get_chars();
726
9
        auto& res_offsets = res.get_offsets();
727
9
        res_offsets.resize(size);
728
9
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
9
        } else {
733
9
            res_chars.reserve(str_col.get_chars().size());
734
9
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
63
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
63
            int len = len_data[index_check_const<len_const>(i)];
739
63
            if (len <= 0 || str.empty()) {
740
23
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
23
                continue;
742
23
            }
743
744
40
            const char* begin = str.begin();
745
40
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
40
            } else {
750
40
                const char* end = str.end();
751
314
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
274
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
274
                }
754
40
            }
755
756
40
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
40
                                                                    res_offsets);
758
40
        }
759
9
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
_ZN5doris12FunctionLeft8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
36
        if constexpr (str_const) {
731
36
            res_chars.reserve(size * (str_col.get_chars().size()));
732
        } else {
733
            res_chars.reserve(str_col.get_chars().size());
734
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
36
        } else {
733
36
            res_chars.reserve(str_col.get_chars().size());
734
36
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
36
        } else {
733
36
            res_chars.reserve(str_col.get_chars().size());
734
36
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
_ZN5doris12FunctionLeft8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
8
                         size_t size) {
725
8
        auto& res_chars = res.get_chars();
726
8
        auto& res_offsets = res.get_offsets();
727
8
        res_offsets.resize(size);
728
8
        const auto& len_data = len_col.get_data();
729
730
8
        if constexpr (str_const) {
731
8
            res_chars.reserve(size * (str_col.get_chars().size()));
732
        } else {
733
            res_chars.reserve(str_col.get_chars().size());
734
        }
735
736
16
        for (int i = 0; i < size; ++i) {
737
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
8
            int len = len_data[index_check_const<len_const>(i)];
739
8
            if (len <= 0 || str.empty()) {
740
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
1
                continue;
742
1
            }
743
744
7
            const char* begin = str.begin();
745
7
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
7
            } else {
750
7
                const char* end = str.end();
751
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
34
                }
754
7
            }
755
756
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
7
                                                                    res_offsets);
758
7
        }
759
8
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
8
                         size_t size) {
725
8
        auto& res_chars = res.get_chars();
726
8
        auto& res_offsets = res.get_offsets();
727
8
        res_offsets.resize(size);
728
8
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
8
        } else {
733
8
            res_chars.reserve(str_col.get_chars().size());
734
8
        }
735
736
16
        for (int i = 0; i < size; ++i) {
737
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
8
            int len = len_data[index_check_const<len_const>(i)];
739
8
            if (len <= 0 || str.empty()) {
740
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
1
                continue;
742
1
            }
743
744
7
            const char* begin = str.begin();
745
7
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
7
            } else {
750
7
                const char* end = str.end();
751
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
34
                }
754
7
            }
755
756
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
7
                                                                    res_offsets);
758
7
        }
759
8
    }
760
};
761
762
class FunctionRight : public IFunction {
763
public:
764
    static constexpr auto name = "right";
765
81
    static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
766
1
    String get_name() const override { return name; }
767
79
    size_t get_number_of_arguments() const override { return 2; }
768
79
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
769
79
        return std::make_shared<DataTypeString>();
770
79
    }
771
772
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
773
61
                        uint32_t result, size_t input_rows_count) const override {
774
61
        auto int_type = std::make_shared<DataTypeInt32>();
775
61
        auto params1 = ColumnInt32::create(input_rows_count);
776
61
        auto params2 = ColumnInt32::create(input_rows_count);
777
61
        size_t num_columns_without_result = block.columns();
778
779
        // params1 = max(arg[1], -len(arg))
780
61
        auto& index_data = params1->get_data();
781
61
        auto& strlen_data = params2->get_data();
782
783
61
        auto str_col =
784
61
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
785
61
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
786
61
        auto pos_col =
787
61
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
788
61
        const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
789
790
147
        for (int i = 0; i < input_rows_count; ++i) {
791
86
            auto str = str_column->get_data_at(i);
792
86
            strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size);
793
86
        }
794
795
147
        for (int i = 0; i < input_rows_count; ++i) {
796
86
            index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
797
86
        }
798
799
61
        block.insert({std::move(params1), int_type, "index"});
800
61
        block.insert({std::move(params2), int_type, "strlen"});
801
802
61
        ColumnNumbers temp_arguments(3);
803
61
        temp_arguments[0] = arguments[0];
804
61
        temp_arguments[1] = num_columns_without_result;
805
61
        temp_arguments[2] = num_columns_without_result + 1;
806
61
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
807
61
        return Status::OK();
808
61
    }
809
};
810
811
struct NullOrEmptyImpl {
812
0
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; }
813
814
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
815
5
                          uint32_t result, size_t input_rows_count, bool reverse) {
816
5
        auto res_map = ColumnUInt8::create(input_rows_count, 0);
817
818
5
        auto column = block.get_by_position(arguments[0]).column;
819
5
        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
820
5
            column = nullable->get_nested_column_ptr();
821
5
            VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data());
822
5
        }
823
5
        auto str_col = assert_cast<const ColumnString*>(column.get());
824
5
        const auto& offsets = str_col->get_offsets();
825
826
5
        auto& res_map_data = res_map->get_data();
827
13
        for (int i = 0; i < input_rows_count; ++i) {
828
8
            int size = offsets[i] - offsets[i - 1];
829
8
            res_map_data[i] |= (size == 0);
830
8
        }
831
5
        if (reverse) {
832
0
            for (int i = 0; i < input_rows_count; ++i) {
833
0
                res_map_data[i] = !res_map_data[i];
834
0
            }
835
0
        }
836
837
5
        block.replace_by_position(result, std::move(res_map));
838
5
        return Status::OK();
839
5
    }
840
};
841
842
class FunctionNullOrEmpty : public IFunction {
843
public:
844
    static constexpr auto name = "null_or_empty";
845
7
    static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); }
846
1
    String get_name() const override { return name; }
847
5
    size_t get_number_of_arguments() const override { return 1; }
848
849
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
850
5
        return std::make_shared<DataTypeUInt8>();
851
5
    }
852
853
10
    bool use_default_implementation_for_nulls() const override { return false; }
854
855
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
856
5
                        uint32_t result, size_t input_rows_count) const override {
857
5
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
858
5
                                                 input_rows_count, false));
859
5
        return Status::OK();
860
5
    }
861
};
862
863
class FunctionNotNullOrEmpty : public IFunction {
864
public:
865
    static constexpr auto name = "not_null_or_empty";
866
2
    static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); }
867
1
    String get_name() const override { return name; }
868
0
    size_t get_number_of_arguments() const override { return 1; }
869
870
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
871
0
        return std::make_shared<DataTypeUInt8>();
872
0
    }
873
874
0
    bool use_default_implementation_for_nulls() const override { return false; }
875
876
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
877
0
                        uint32_t result, size_t input_rows_count) const override {
878
0
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
879
0
                                                 input_rows_count, true));
880
0
        return Status::OK();
881
0
    }
882
};
883
884
class FunctionStringConcat : public IFunction {
885
public:
886
    struct ConcatState {
887
        bool use_state = false;
888
        std::string tail;
889
    };
890
891
    static constexpr auto name = "concat";
892
365
    static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); }
893
0
    String get_name() const override { return name; }
894
0
    size_t get_number_of_arguments() const override { return 0; }
895
364
    bool is_variadic() const override { return true; }
896
897
363
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
898
363
        return std::make_shared<DataTypeString>();
899
363
    }
900
901
727
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
902
727
        if (scope == FunctionContext::THREAD_LOCAL) {
903
363
            return Status::OK();
904
363
        }
905
364
        std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>();
906
907
364
        context->set_function_state(scope, state);
908
909
364
        state->use_state = true;
910
911
        // Optimize function calls like this:
912
        // concat(col, "123", "abc", "456") -> tail = "123abc456"
913
580
        for (size_t i = 1; i < context->get_num_args(); i++) {
914
383
            const auto* column_string = context->get_constant_col(i);
915
383
            if (column_string == nullptr) {
916
139
                state->use_state = false;
917
139
                return IFunction::open(context, scope);
918
139
            }
919
244
            auto string_vale = column_string->column_ptr->get_data_at(0);
920
244
            if (string_vale.data == nullptr) {
921
                // For concat(col, null), it is handled by default_implementation_for_nulls
922
28
                state->use_state = false;
923
28
                return IFunction::open(context, scope);
924
28
            }
925
926
216
            state->tail.append(string_vale.begin(), string_vale.size);
927
216
        }
928
929
        // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below.
930
197
        state->tail.reserve(state->tail.size() + 16);
931
932
197
        return IFunction::open(context, scope);
933
364
    }
934
935
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
936
292
                        uint32_t result, size_t input_rows_count) const override {
937
292
        DCHECK_GE(arguments.size(), 1);
938
939
292
        if (arguments.size() == 1) {
940
3
            block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
941
3
            return Status::OK();
942
3
        }
943
289
        auto* concat_state = reinterpret_cast<ConcatState*>(
944
289
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
945
289
        if (!concat_state) {
946
0
            return Status::RuntimeError("funciton context for function '{}' must have ConcatState;",
947
0
                                        get_name());
948
0
        }
949
289
        if (concat_state->use_state) {
950
175
            const auto& [col, is_const] =
951
175
                    unpack_if_const(block.get_by_position(arguments[0]).column);
952
175
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
953
175
            if (is_const) {
954
0
                return execute_const<true>(concat_state, block, col_str, result, input_rows_count);
955
175
            } else {
956
175
                return execute_const<false>(concat_state, block, col_str, result, input_rows_count);
957
175
            }
958
959
175
        } else {
960
114
            return execute_vecotr(block, arguments, result, input_rows_count);
961
114
        }
962
289
    }
963
964
    Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result,
965
114
                          size_t input_rows_count) const {
966
114
        int argument_size = arguments.size();
967
114
        std::vector<ColumnPtr> argument_columns(argument_size);
968
969
114
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
970
114
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
971
114
        std::vector<bool> is_const_args(argument_size);
972
973
374
        for (int i = 0; i < argument_size; ++i) {
974
260
            const auto& [col, is_const] =
975
260
                    unpack_if_const(block.get_by_position(arguments[i]).column);
976
977
260
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
978
260
            offsets_list[i] = &col_str->get_offsets();
979
260
            chars_list[i] = &col_str->get_chars();
980
260
            is_const_args[i] = is_const;
981
260
        }
982
983
114
        auto res = ColumnString::create();
984
114
        auto& res_data = res->get_chars();
985
114
        auto& res_offset = res->get_offsets();
986
987
114
        res_offset.resize(input_rows_count);
988
114
        size_t res_reserve_size = 0;
989
374
        for (size_t i = 0; i < argument_size; ++i) {
990
260
            if (is_const_args[i]) {
991
123
                res_reserve_size += (*offsets_list[i])[0] * input_rows_count;
992
137
            } else {
993
137
                res_reserve_size += (*offsets_list[i])[input_rows_count - 1];
994
137
            }
995
260
        }
996
997
114
        ColumnString::check_chars_length(res_reserve_size, 0);
998
999
114
        res_data.resize(res_reserve_size);
1000
1001
114
        auto* data = res_data.data();
1002
114
        size_t dst_offset = 0;
1003
1004
333
        for (size_t i = 0; i < input_rows_count; ++i) {
1005
695
            for (size_t j = 0; j < argument_size; ++j) {
1006
476
                const auto& current_offsets = *offsets_list[j];
1007
476
                const auto& current_chars = *chars_list[j];
1008
476
                auto idx = index_check_const(i, is_const_args[j]);
1009
476
                const auto size = current_offsets[idx] - current_offsets[idx - 1];
1010
476
                if (size > 0) {
1011
386
                    memcpy_small_allow_read_write_overflow15(
1012
386
                            data + dst_offset, current_chars.data() + current_offsets[idx - 1],
1013
386
                            size);
1014
386
                    dst_offset += size;
1015
386
                }
1016
476
            }
1017
219
            res_offset[i] = dst_offset;
1018
219
        }
1019
1020
114
        block.get_by_position(result).column = std::move(res);
1021
114
        return Status::OK();
1022
114
    }
1023
1024
    template <bool is_const>
1025
    Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str,
1026
175
                         uint32_t result, size_t input_rows_count) const {
1027
        // using tail optimize
1028
1029
175
        auto res = ColumnString::create();
1030
175
        auto& res_data = res->get_chars();
1031
175
        auto& res_offset = res->get_offsets();
1032
175
        res_offset.resize(input_rows_count);
1033
1034
175
        size_t res_reserve_size = 0;
1035
175
        if constexpr (is_const) {
1036
0
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
1037
175
        } else {
1038
175
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
1039
175
        }
1040
175
        res_reserve_size += concat_state->tail.size() * input_rows_count;
1041
1042
175
        ColumnString::check_chars_length(res_reserve_size, 0);
1043
175
        res_data.resize(res_reserve_size);
1044
1045
175
        const auto& tail = concat_state->tail;
1046
175
        auto* data = res_data.data();
1047
175
        size_t dst_offset = 0;
1048
1049
352
        for (size_t i = 0; i < input_rows_count; ++i) {
1050
177
            const auto idx = index_check_const<is_const>(i);
1051
177
            StringRef str_val = col_str->get_data_at(idx);
1052
            // copy column
1053
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
1054
177
            dst_offset += str_val.size;
1055
            // copy tail
1056
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
1057
177
            dst_offset += tail.size();
1058
177
            res_offset[i] = dst_offset;
1059
177
        }
1060
175
        block.get_by_position(result).column = std::move(res);
1061
175
        return Status::OK();
1062
175
    }
Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
_ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
Line
Count
Source
1026
175
                         uint32_t result, size_t input_rows_count) const {
1027
        // using tail optimize
1028
1029
175
        auto res = ColumnString::create();
1030
175
        auto& res_data = res->get_chars();
1031
175
        auto& res_offset = res->get_offsets();
1032
175
        res_offset.resize(input_rows_count);
1033
1034
175
        size_t res_reserve_size = 0;
1035
        if constexpr (is_const) {
1036
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
1037
175
        } else {
1038
175
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
1039
175
        }
1040
175
        res_reserve_size += concat_state->tail.size() * input_rows_count;
1041
1042
175
        ColumnString::check_chars_length(res_reserve_size, 0);
1043
175
        res_data.resize(res_reserve_size);
1044
1045
175
        const auto& tail = concat_state->tail;
1046
175
        auto* data = res_data.data();
1047
175
        size_t dst_offset = 0;
1048
1049
352
        for (size_t i = 0; i < input_rows_count; ++i) {
1050
177
            const auto idx = index_check_const<is_const>(i);
1051
177
            StringRef str_val = col_str->get_data_at(idx);
1052
            // copy column
1053
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
1054
177
            dst_offset += str_val.size;
1055
            // copy tail
1056
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
1057
177
            dst_offset += tail.size();
1058
177
            res_offset[i] = dst_offset;
1059
177
        }
1060
175
        block.get_by_position(result).column = std::move(res);
1061
175
        return Status::OK();
1062
175
    }
1063
};
1064
1065
class FunctionStringElt : public IFunction {
1066
public:
1067
    static constexpr auto name = "elt";
1068
360
    static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); }
1069
0
    String get_name() const override { return name; }
1070
0
    size_t get_number_of_arguments() const override { return 0; }
1071
359
    bool is_variadic() const override { return true; }
1072
1073
358
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1074
358
        return make_nullable(std::make_shared<DataTypeString>());
1075
358
    }
1076
716
    bool use_default_implementation_for_nulls() const override { return false; }
1077
1078
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1079
358
                        uint32_t result, size_t input_rows_count) const override {
1080
358
        int arguent_size = arguments.size();
1081
358
        int num_children = arguent_size - 1;
1082
358
        auto res = ColumnString::create();
1083
1084
358
        if (auto const_column = check_and_get_column<ColumnConst>(
1085
358
                    *block.get_by_position(arguments[0]).column)) {
1086
153
            auto data = const_column->get_data_at(0);
1087
            // return NULL, pos is null or pos < 0 or pos > num_children
1088
153
            auto is_null = data.data == nullptr;
1089
153
            auto pos = is_null ? 0 : *(Int32*)data.data;
1090
153
            is_null = pos <= 0 || pos > num_children;
1091
1092
153
            auto null_map = ColumnUInt8::create(input_rows_count, is_null);
1093
153
            if (is_null) {
1094
135
                res->insert_many_defaults(input_rows_count);
1095
135
            } else {
1096
18
                auto& target_column = block.get_by_position(arguments[pos]).column;
1097
18
                if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) {
1098
6
                    auto target_data = target_const_column->get_data_at(0);
1099
                    // return NULL, no target data
1100
6
                    if (target_data.data == nullptr) {
1101
0
                        null_map = ColumnUInt8::create(input_rows_count, true);
1102
0
                        res->insert_many_defaults(input_rows_count);
1103
6
                    } else {
1104
6
                        res->insert_data_repeatedly(target_data.data, target_data.size,
1105
6
                                                    input_rows_count);
1106
6
                    }
1107
12
                } else if (auto target_nullable_column =
1108
12
                                   check_and_get_column<ColumnNullable>(*target_column)) {
1109
12
                    auto& target_null_map = target_nullable_column->get_null_map_data();
1110
12
                    VectorizedUtils::update_null_map(
1111
12
                            assert_cast<ColumnUInt8&>(*null_map).get_data(), target_null_map);
1112
1113
12
                    auto& target_str_column = assert_cast<const ColumnString&>(
1114
12
                            target_nullable_column->get_nested_column());
1115
12
                    res->get_chars().assign(target_str_column.get_chars().begin(),
1116
12
                                            target_str_column.get_chars().end());
1117
12
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
1118
12
                                              target_str_column.get_offsets().end());
1119
12
                } else {
1120
0
                    auto& target_str_column = assert_cast<const ColumnString&>(*target_column);
1121
0
                    res->get_chars().assign(target_str_column.get_chars().begin(),
1122
0
                                            target_str_column.get_chars().end());
1123
0
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
1124
0
                                              target_str_column.get_offsets().end());
1125
0
                }
1126
18
            }
1127
153
            block.get_by_position(result).column =
1128
153
                    ColumnNullable::create(std::move(res), std::move(null_map));
1129
205
        } else if (auto pos_null_column = check_and_get_column<ColumnNullable>(
1130
205
                           *block.get_by_position(arguments[0]).column)) {
1131
205
            auto& pos_column =
1132
205
                    assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column());
1133
205
            auto& pos_null_map = pos_null_column->get_null_map_data();
1134
205
            auto null_map = ColumnUInt8::create(input_rows_count, false);
1135
205
            auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data();
1136
1137
460
            for (size_t i = 0; i < input_rows_count; ++i) {
1138
255
                auto pos = pos_column.get_element(i);
1139
255
                res_null_map[i] =
1140
255
                        pos_null_map[i] || pos <= 0 || pos > num_children ||
1141
255
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
1142
30
                                nullptr;
1143
255
                if (res_null_map[i]) {
1144
225
                    res->insert_default();
1145
225
                } else {
1146
30
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
1147
30
                    res->insert_data(insert_data.data, insert_data.size);
1148
30
                }
1149
255
            }
1150
205
            block.get_by_position(result).column =
1151
205
                    ColumnNullable::create(std::move(res), std::move(null_map));
1152
205
        } else {
1153
0
            auto& pos_column =
1154
0
                    assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column);
1155
0
            auto null_map = ColumnUInt8::create(input_rows_count, false);
1156
0
            auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data();
1157
1158
0
            for (size_t i = 0; i < input_rows_count; ++i) {
1159
0
                auto pos = pos_column.get_element(i);
1160
0
                res_null_map[i] =
1161
0
                        pos <= 0 || pos > num_children ||
1162
0
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
1163
0
                                nullptr;
1164
0
                if (res_null_map[i]) {
1165
0
                    res->insert_default();
1166
0
                } else {
1167
0
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
1168
0
                    res->insert_data(insert_data.data, insert_data.size);
1169
0
                }
1170
0
            }
1171
0
            block.get_by_position(result).column =
1172
0
                    ColumnNullable::create(std::move(res), std::move(null_map));
1173
0
        }
1174
358
        return Status::OK();
1175
358
    }
1176
};
1177
1178
// concat_ws (string,string....) or (string, Array)
1179
// TODO: avoid use fmtlib
1180
class FunctionStringConcatWs : public IFunction {
1181
public:
1182
    using Chars = ColumnString::Chars;
1183
    using Offsets = ColumnString::Offsets;
1184
1185
    static constexpr auto name = "concat_ws";
1186
451
    static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); }
1187
0
    String get_name() const override { return name; }
1188
0
    size_t get_number_of_arguments() const override { return 0; }
1189
450
    bool is_variadic() const override { return true; }
1190
1191
449
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1192
449
        const IDataType* first_type = arguments[0].get();
1193
449
        if (first_type->is_nullable()) {
1194
449
            return make_nullable(std::make_shared<DataTypeString>());
1195
449
        } else {
1196
0
            return std::make_shared<DataTypeString>();
1197
0
        }
1198
449
    }
1199
898
    bool use_default_implementation_for_nulls() const override { return false; }
1200
1201
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1202
449
                        uint32_t result, size_t input_rows_count) const override {
1203
449
        DCHECK_GE(arguments.size(), 2);
1204
449
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1205
        // we create a zero column to simply implement
1206
449
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1207
449
        auto res = ColumnString::create();
1208
449
        bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable();
1209
449
        size_t argument_size = arguments.size();
1210
449
        std::vector<const Offsets*> offsets_list(argument_size);
1211
449
        std::vector<const Chars*> chars_list(argument_size);
1212
449
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
1213
1214
449
        std::vector<ColumnPtr> argument_columns(argument_size);
1215
449
        std::vector<ColumnPtr> argument_null_columns(argument_size);
1216
1217
1.53k
        for (size_t i = 0; i < argument_size; ++i) {
1218
1.08k
            argument_columns[i] =
1219
1.08k
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
1220
1.08k
            if (const auto* nullable =
1221
1.08k
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
1222
                // Danger: Here must dispose the null map data first! Because
1223
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
1224
                // of column nullable mem of null map
1225
1.08k
                null_list[i] = &nullable->get_null_map_data();
1226
1.08k
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
1227
1.08k
                argument_columns[i] = nullable->get_nested_column_ptr();
1228
1.08k
            } else {
1229
0
                null_list[i] = &const_null_map->get_data();
1230
0
            }
1231
1232
1.08k
            if (is_column<ColumnArray>(argument_columns[i].get())) {
1233
36
                continue;
1234
36
            }
1235
1236
1.05k
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
1237
1.05k
            offsets_list[i] = &col_str->get_offsets();
1238
1.05k
            chars_list[i] = &col_str->get_chars();
1239
1.05k
        }
1240
1241
449
        auto& res_data = res->get_chars();
1242
449
        auto& res_offset = res->get_offsets();
1243
449
        res_offset.resize(input_rows_count);
1244
1245
449
        VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]);
1246
449
        fmt::memory_buffer buffer;
1247
449
        std::vector<std::string_view> views;
1248
1249
449
        if (is_column<ColumnArray>(argument_columns[1].get())) {
1250
            // Determine if the nested type of the array is String
1251
36
            const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
1252
36
            if (!array_column.get_data().is_column_string()) {
1253
0
                return Status::NotSupported(
1254
0
                        fmt::format("unsupported nested array of type {} for function {}",
1255
0
                                    is_column_nullable(array_column.get_data())
1256
0
                                            ? array_column.get_data().get_name()
1257
0
                                            : array_column.get_data().get_name(),
1258
0
                                    get_name()));
1259
0
            }
1260
            // Concat string in array
1261
36
            _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list,
1262
36
                           null_list, res_data, res_offset);
1263
1264
413
        } else {
1265
            // Concat string
1266
413
            _execute_string(input_rows_count, argument_size, buffer, views, offsets_list,
1267
413
                            chars_list, null_list, res_data, res_offset);
1268
413
        }
1269
449
        if (is_null_type) {
1270
449
            block.get_by_position(result).column =
1271
449
                    ColumnNullable::create(std::move(res), std::move(null_map));
1272
449
        } else {
1273
0
            block.get_by_position(result).column = std::move(res);
1274
0
        }
1275
449
        return Status::OK();
1276
449
    }
1277
1278
private:
1279
    void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column,
1280
                        fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
1281
                        const std::vector<const Offsets*>& offsets_list,
1282
                        const std::vector<const Chars*>& chars_list,
1283
                        const std::vector<const ColumnUInt8::Container*>& null_list,
1284
36
                        Chars& res_data, Offsets& res_offset) const {
1285
        // Get array nested column
1286
36
        const UInt8* array_nested_null_map = nullptr;
1287
36
        ColumnPtr array_nested_column = nullptr;
1288
1289
36
        if (is_column_nullable(array_column.get_data())) {
1290
36
            const auto& array_nested_null_column =
1291
36
                    reinterpret_cast<const ColumnNullable&>(array_column.get_data());
1292
            // String's null map in array
1293
36
            array_nested_null_map =
1294
36
                    array_nested_null_column.get_null_map_column().get_data().data();
1295
36
            array_nested_column = array_nested_null_column.get_nested_column_ptr();
1296
36
        } else {
1297
0
            array_nested_column = array_column.get_data_ptr();
1298
0
        }
1299
1300
36
        const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column);
1301
36
        const Chars& string_src_chars = string_column.get_chars();
1302
36
        const auto& src_string_offsets = string_column.get_offsets();
1303
36
        const auto& src_array_offsets = array_column.get_offsets();
1304
36
        size_t current_src_array_offset = 0;
1305
1306
        // Concat string in array
1307
76
        for (size_t i = 0; i < input_rows_count; ++i) {
1308
40
            auto& sep_offsets = *offsets_list[0];
1309
40
            auto& sep_chars = *chars_list[0];
1310
40
            auto& sep_nullmap = *null_list[0];
1311
1312
40
            if (sep_nullmap[i]) {
1313
8
                res_offset[i] = res_data.size();
1314
8
                current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1];
1315
8
                continue;
1316
8
            }
1317
1318
32
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
1319
32
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
1320
1321
32
            std::string_view sep(sep_data, sep_size);
1322
32
            buffer.clear();
1323
32
            views.clear();
1324
1325
32
            for (auto next_src_array_offset = src_array_offsets[i];
1326
128
                 current_src_array_offset < next_src_array_offset; ++current_src_array_offset) {
1327
96
                const auto current_src_string_offset =
1328
96
                        current_src_array_offset ? src_string_offsets[current_src_array_offset - 1]
1329
96
                                                 : 0;
1330
96
                size_t bytes_to_copy =
1331
96
                        src_string_offsets[current_src_array_offset] - current_src_string_offset;
1332
96
                const char* ptr =
1333
96
                        reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]);
1334
1335
96
                if (array_nested_null_map == nullptr ||
1336
96
                    !array_nested_null_map[current_src_array_offset]) {
1337
96
                    views.emplace_back(ptr, bytes_to_copy);
1338
96
                }
1339
96
            }
1340
1341
32
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
1342
1343
32
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1344
32
                                        res_offset);
1345
32
        }
1346
36
    }
1347
1348
    void _execute_string(const size_t& input_rows_count, const size_t& argument_size,
1349
                         fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
1350
                         const std::vector<const Offsets*>& offsets_list,
1351
                         const std::vector<const Chars*>& chars_list,
1352
                         const std::vector<const ColumnUInt8::Container*>& null_list,
1353
413
                         Chars& res_data, Offsets& res_offset) const {
1354
        // Concat string
1355
933
        for (size_t i = 0; i < input_rows_count; ++i) {
1356
520
            auto& sep_offsets = *offsets_list[0];
1357
520
            auto& sep_chars = *chars_list[0];
1358
520
            auto& sep_nullmap = *null_list[0];
1359
520
            if (sep_nullmap[i]) {
1360
72
                res_offset[i] = res_data.size();
1361
72
                continue;
1362
72
            }
1363
1364
448
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
1365
448
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
1366
1367
448
            std::string_view sep(sep_data, sep_size);
1368
448
            buffer.clear();
1369
448
            views.clear();
1370
1.04k
            for (size_t j = 1; j < argument_size; ++j) {
1371
600
                auto& current_offsets = *offsets_list[j];
1372
600
                auto& current_chars = *chars_list[j];
1373
600
                auto& current_nullmap = *null_list[j];
1374
600
                int size = current_offsets[i] - current_offsets[i - 1];
1375
600
                const char* ptr =
1376
600
                        reinterpret_cast<const char*>(&current_chars[current_offsets[i - 1]]);
1377
600
                if (!current_nullmap[i]) {
1378
548
                    views.emplace_back(ptr, size);
1379
548
                }
1380
600
            }
1381
448
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
1382
448
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1383
448
                                        res_offset);
1384
448
        }
1385
413
    }
1386
};
1387
1388
class FunctionStringRepeat : public IFunction {
1389
public:
1390
    static constexpr auto name = "repeat";
1391
183
    static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
1392
1
    String get_name() const override { return name; }
1393
181
    size_t get_number_of_arguments() const override { return 2; }
1394
    // should set NULL value of nested data to default,
1395
    // as iff it's not inited and invalid, the repeat result of length is so large cause overflow
1396
163
    bool need_replace_null_data_to_default() const override { return true; }
1397
1398
181
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1399
181
        return make_nullable(std::make_shared<DataTypeString>());
1400
181
    }
1401
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1402
163
                        uint32_t result, size_t input_rows_count) const override {
1403
163
        DCHECK_EQ(arguments.size(), 2);
1404
163
        auto res = ColumnString::create();
1405
163
        auto null_map = ColumnUInt8::create();
1406
1407
163
        ColumnPtr argument_ptr[2];
1408
163
        argument_ptr[0] =
1409
163
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1410
163
        argument_ptr[1] = block.get_by_position(arguments[1]).column;
1411
1412
163
        if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
1413
163
            if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
1414
109
                RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(),
1415
109
                                              col2->get_data(), res->get_chars(),
1416
109
                                              res->get_offsets(), null_map->get_data()));
1417
109
                block.replace_by_position(
1418
109
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1419
109
                return Status::OK();
1420
109
            } else if (const auto* col2_const =
1421
54
                               check_and_get_column<ColumnConst>(*argument_ptr[1])) {
1422
54
                DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
1423
54
                int repeat = col2_const->get_int(0);
1424
54
                if (repeat <= 0) {
1425
18
                    null_map->get_data().resize_fill(input_rows_count, 0);
1426
18
                    res->insert_many_defaults(input_rows_count);
1427
36
                } else {
1428
36
                    vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
1429
36
                                 res->get_offsets(), null_map->get_data());
1430
36
                }
1431
54
                block.replace_by_position(
1432
54
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1433
54
                return Status::OK();
1434
54
            }
1435
163
        }
1436
1437
0
        return Status::RuntimeError("repeat function get error param: {}, {}",
1438
0
                                    argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
1439
163
    }
1440
1441
    Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1442
                         const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
1443
                         ColumnString::Offsets& res_offsets,
1444
109
                         ColumnUInt8::Container& null_map) const {
1445
109
        size_t input_row_size = offsets.size();
1446
1447
109
        fmt::memory_buffer buffer;
1448
109
        res_offsets.resize(input_row_size);
1449
109
        null_map.resize_fill(input_row_size, 0);
1450
277
        for (ssize_t i = 0; i < input_row_size; ++i) {
1451
168
            buffer.clear();
1452
168
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1453
168
            size_t size = offsets[i] - offsets[i - 1];
1454
168
            int repeat = repeats[i];
1455
168
            if (repeat <= 0) {
1456
56
                StringOP::push_empty_string(i, res_data, res_offsets);
1457
112
            } else {
1458
112
                ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
1459
644
                for (int j = 0; j < repeat; ++j) {
1460
532
                    buffer.append(raw_str, raw_str + size);
1461
532
                }
1462
112
                StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
1463
112
                                            res_data, res_offsets);
1464
112
            }
1465
168
        }
1466
109
        return Status::OK();
1467
109
    }
1468
1469
    // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
1470
    //       2. abstract the `vector_vector` and `vector_const`
1471
    //       3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
1472
    void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1473
                      int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1474
36
                      ColumnUInt8::Container& null_map) const {
1475
36
        size_t input_row_size = offsets.size();
1476
1477
36
        fmt::memory_buffer buffer;
1478
36
        res_offsets.resize(input_row_size);
1479
36
        null_map.resize_fill(input_row_size, 0);
1480
72
        for (ssize_t i = 0; i < input_row_size; ++i) {
1481
36
            buffer.clear();
1482
36
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1483
36
            size_t size = offsets[i] - offsets[i - 1];
1484
36
            ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
1485
1486
207
            for (int j = 0; j < repeat; ++j) {
1487
171
                buffer.append(raw_str, raw_str + size);
1488
171
            }
1489
36
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1490
36
                                        res_offsets);
1491
36
        }
1492
36
    }
1493
};
1494
1495
template <typename Impl>
1496
class FunctionStringPad : public IFunction {
1497
public:
1498
    static constexpr auto name = Impl::name;
1499
1.37k
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv
Line
Count
Source
1499
689
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv
Line
Count
Source
1499
682
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
1500
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev
Line
Count
Source
1500
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev
Line
Count
Source
1500
1
    String get_name() const override { return name; }
1501
1.36k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv
Line
Count
Source
1501
687
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv
Line
Count
Source
1501
680
    size_t get_number_of_arguments() const override { return 3; }
1502
1503
1.36k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
1.36k
        return make_nullable(std::make_shared<DataTypeString>());
1505
1.36k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1503
687
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
687
        return make_nullable(std::make_shared<DataTypeString>());
1505
687
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1503
680
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
680
        return make_nullable(std::make_shared<DataTypeString>());
1505
680
    }
1506
1507
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1508
870
                        uint32_t result, size_t input_rows_count) const override {
1509
870
        DCHECK_GE(arguments.size(), 3);
1510
870
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
870
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
870
        auto res = ColumnString::create();
1514
1515
870
        ColumnPtr col[3];
1516
870
        bool col_const[3];
1517
3.48k
        for (size_t i = 0; i < 3; ++i) {
1518
2.61k
            std::tie(col[i], col_const[i]) =
1519
2.61k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
2.61k
        }
1521
870
        auto& null_map_data = null_map->get_data();
1522
870
        auto& res_offsets = res->get_offsets();
1523
870
        auto& res_chars = res->get_chars();
1524
870
        res_offsets.resize(input_rows_count);
1525
1526
870
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
870
        const auto& strcol_offsets = strcol->get_offsets();
1528
870
        const auto& strcol_chars = strcol->get_chars();
1529
1530
870
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
870
        const auto& col_len_data = col_len->get_data();
1532
1533
870
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
870
        const auto& padcol_offsets = padcol->get_offsets();
1535
870
        const auto& padcol_chars = padcol->get_chars();
1536
870
        std::visit(
1537
870
                [&](auto str_const, auto len_const, auto pad_const) {
1538
870
                    execute_utf8<str_const, len_const, pad_const>(
1539
870
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
870
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
870
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
1537
63
                [&](auto str_const, auto len_const, auto pad_const) {
1538
63
                    execute_utf8<str_const, len_const, pad_const>(
1539
63
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
63
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
63
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
1537
63
                [&](auto str_const, auto len_const, auto pad_const) {
1538
63
                    execute_utf8<str_const, len_const, pad_const>(
1539
63
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
63
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
63
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
1542
870
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
870
                make_bool_variant(col_const[2]));
1544
1545
870
        block.get_by_position(result).column =
1546
870
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
870
        return Status::OK();
1548
870
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1508
435
                        uint32_t result, size_t input_rows_count) const override {
1509
435
        DCHECK_GE(arguments.size(), 3);
1510
435
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
435
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
435
        auto res = ColumnString::create();
1514
1515
435
        ColumnPtr col[3];
1516
435
        bool col_const[3];
1517
1.74k
        for (size_t i = 0; i < 3; ++i) {
1518
1.30k
            std::tie(col[i], col_const[i]) =
1519
1.30k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
1.30k
        }
1521
435
        auto& null_map_data = null_map->get_data();
1522
435
        auto& res_offsets = res->get_offsets();
1523
435
        auto& res_chars = res->get_chars();
1524
435
        res_offsets.resize(input_rows_count);
1525
1526
435
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
435
        const auto& strcol_offsets = strcol->get_offsets();
1528
435
        const auto& strcol_chars = strcol->get_chars();
1529
1530
435
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
435
        const auto& col_len_data = col_len->get_data();
1532
1533
435
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
435
        const auto& padcol_offsets = padcol->get_offsets();
1535
435
        const auto& padcol_chars = padcol->get_chars();
1536
435
        std::visit(
1537
435
                [&](auto str_const, auto len_const, auto pad_const) {
1538
435
                    execute_utf8<str_const, len_const, pad_const>(
1539
435
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
435
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
435
                },
1542
435
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
435
                make_bool_variant(col_const[2]));
1544
1545
435
        block.get_by_position(result).column =
1546
435
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
435
        return Status::OK();
1548
435
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1508
435
                        uint32_t result, size_t input_rows_count) const override {
1509
435
        DCHECK_GE(arguments.size(), 3);
1510
435
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
435
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
435
        auto res = ColumnString::create();
1514
1515
435
        ColumnPtr col[3];
1516
435
        bool col_const[3];
1517
1.74k
        for (size_t i = 0; i < 3; ++i) {
1518
1.30k
            std::tie(col[i], col_const[i]) =
1519
1.30k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
1.30k
        }
1521
435
        auto& null_map_data = null_map->get_data();
1522
435
        auto& res_offsets = res->get_offsets();
1523
435
        auto& res_chars = res->get_chars();
1524
435
        res_offsets.resize(input_rows_count);
1525
1526
435
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
435
        const auto& strcol_offsets = strcol->get_offsets();
1528
435
        const auto& strcol_chars = strcol->get_chars();
1529
1530
435
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
435
        const auto& col_len_data = col_len->get_data();
1532
1533
435
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
435
        const auto& padcol_offsets = padcol->get_offsets();
1535
435
        const auto& padcol_chars = padcol->get_chars();
1536
435
        std::visit(
1537
435
                [&](auto str_const, auto len_const, auto pad_const) {
1538
435
                    execute_utf8<str_const, len_const, pad_const>(
1539
435
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
435
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
435
                },
1542
435
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
435
                make_bool_variant(col_const[2]));
1544
1545
435
        block.get_by_position(result).column =
1546
435
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
435
        return Status::OK();
1548
435
    }
1549
1550
    template <bool str_const, bool len_const, bool pad_const>
1551
    void execute_utf8(const ColumnString::Offsets& strcol_offsets,
1552
                      const ColumnString::Chars& strcol_chars,
1553
                      const ColumnInt32::Container& col_len_data,
1554
                      const ColumnString::Offsets& padcol_offsets,
1555
                      const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets,
1556
                      ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data,
1557
870
                      size_t input_rows_count) const {
1558
870
        std::vector<size_t> pad_index;
1559
870
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
870
        if constexpr (pad_const) {
1563
372
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
372
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
372
        }
1566
1567
870
        fmt::memory_buffer buffer;
1568
870
        buffer.resize(strcol_chars.size());
1569
870
        size_t buffer_len = 0;
1570
1571
1.93k
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
1.06k
            if constexpr (!pad_const) {
1573
691
                pad_index.clear();
1574
691
            }
1575
1.06k
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
1.06k
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
548
                null_map_data[i] = true;
1579
548
                res_offsets[i] = buffer_len;
1580
548
            } else {
1581
515
                const auto str_idx = index_check_const<str_const>(i);
1582
515
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
515
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
515
                const auto pad_idx = index_check_const<pad_const>(i);
1585
515
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
515
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
515
                auto [iterate_byte_len, iterate_char_len] =
1589
515
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
515
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
515
                if (iterate_char_len == len) {
1593
471
                    buffer.resize(buffer_len + iterate_byte_len);
1594
471
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
471
                    buffer_len += iterate_byte_len;
1596
471
                    res_offsets[i] = buffer_len;
1597
471
                    continue;
1598
471
                }
1599
44
                size_t pad_char_size;
1600
44
                if constexpr (!pad_const) {
1601
32
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
32
                                                                         pad_len, pad_index);
1603
32
                } else {
1604
12
                    pad_char_size = const_pad_char_size;
1605
12
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
44
                if (pad_char_size == 0) {
1609
6
                    res_offsets[i] = buffer_len;
1610
6
                    continue;
1611
6
                }
1612
38
                const size_t str_char_size = iterate_char_len;
1613
38
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
38
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
38
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
38
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
38
                buffer.resize(buffer_len + new_capacity);
1618
38
                if constexpr (!Impl::is_lpad) {
1619
19
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
19
                    buffer_len += str_len;
1621
19
                }
1622
                // Prepend chars of pad.
1623
38
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
38
                                      pad_times);
1625
38
                buffer_len += pad_times * pad_len;
1626
1627
38
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
38
                buffer_len += pad_remainder_len;
1629
1630
38
                if constexpr (Impl::is_lpad) {
1631
19
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
19
                    buffer_len += str_len;
1633
19
                }
1634
38
                res_offsets[i] = buffer_len;
1635
38
            }
1636
1.06k
        }
1637
870
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
870
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
63
                      size_t input_rows_count) const {
1558
63
        std::vector<size_t> pad_index;
1559
63
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
63
        fmt::memory_buffer buffer;
1568
63
        buffer.resize(strcol_chars.size());
1569
63
        size_t buffer_len = 0;
1570
1571
223
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
160
            if constexpr (!pad_const) {
1573
160
                pad_index.clear();
1574
160
            }
1575
160
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
160
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
82
                null_map_data[i] = true;
1579
82
                res_offsets[i] = buffer_len;
1580
82
            } else {
1581
78
                const auto str_idx = index_check_const<str_const>(i);
1582
78
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
78
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
78
                const auto pad_idx = index_check_const<pad_const>(i);
1585
78
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
78
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
78
                auto [iterate_byte_len, iterate_char_len] =
1589
78
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
78
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
78
                if (iterate_char_len == len) {
1593
68
                    buffer.resize(buffer_len + iterate_byte_len);
1594
68
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
68
                    buffer_len += iterate_byte_len;
1596
68
                    res_offsets[i] = buffer_len;
1597
68
                    continue;
1598
68
                }
1599
10
                size_t pad_char_size;
1600
10
                if constexpr (!pad_const) {
1601
10
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
10
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
10
                if (pad_char_size == 0) {
1609
3
                    res_offsets[i] = buffer_len;
1610
3
                    continue;
1611
3
                }
1612
7
                const size_t str_char_size = iterate_char_len;
1613
7
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
7
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
7
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
7
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
7
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
7
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
7
                                      pad_times);
1625
7
                buffer_len += pad_times * pad_len;
1626
1627
7
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
7
                buffer_len += pad_remainder_len;
1629
1630
7
                if constexpr (Impl::is_lpad) {
1631
7
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
7
                    buffer_len += str_len;
1633
7
                }
1634
7
                res_offsets[i] = buffer_len;
1635
7
            }
1636
160
        }
1637
63
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
63
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
63
                      size_t input_rows_count) const {
1558
63
        std::vector<size_t> pad_index;
1559
63
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
63
        fmt::memory_buffer buffer;
1568
63
        buffer.resize(strcol_chars.size());
1569
63
        size_t buffer_len = 0;
1570
1571
222
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
159
            if constexpr (!pad_const) {
1573
159
                pad_index.clear();
1574
159
            }
1575
159
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
159
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
82
                null_map_data[i] = true;
1579
82
                res_offsets[i] = buffer_len;
1580
82
            } else {
1581
77
                const auto str_idx = index_check_const<str_const>(i);
1582
77
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
77
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
77
                const auto pad_idx = index_check_const<pad_const>(i);
1585
77
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
77
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
77
                auto [iterate_byte_len, iterate_char_len] =
1589
77
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
77
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
77
                if (iterate_char_len == len) {
1593
67
                    buffer.resize(buffer_len + iterate_byte_len);
1594
67
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
67
                    buffer_len += iterate_byte_len;
1596
67
                    res_offsets[i] = buffer_len;
1597
67
                    continue;
1598
67
                }
1599
10
                size_t pad_char_size;
1600
10
                if constexpr (!pad_const) {
1601
10
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
10
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
10
                if (pad_char_size == 0) {
1609
3
                    res_offsets[i] = buffer_len;
1610
3
                    continue;
1611
3
                }
1612
7
                const size_t str_char_size = iterate_char_len;
1613
7
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
7
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
7
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
7
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
7
                buffer.resize(buffer_len + new_capacity);
1618
7
                if constexpr (!Impl::is_lpad) {
1619
7
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
7
                    buffer_len += str_len;
1621
7
                }
1622
                // Prepend chars of pad.
1623
7
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
7
                                      pad_times);
1625
7
                buffer_len += pad_times * pad_len;
1626
1627
7
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
7
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
7
                res_offsets[i] = buffer_len;
1635
7
            }
1636
159
        }
1637
63
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
63
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
1639
};
1640
1641
template <typename Impl>
1642
class FunctionStringFormatRound : public IFunction {
1643
public:
1644
    static constexpr auto name = "format_round";
1645
31
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE6createEv
Line
Count
Source
1645
3
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE6createEv
Line
Count
Source
1645
3
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE6createEv
Line
Count
Source
1645
3
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv
Line
Count
Source
1645
3
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv
Line
Count
Source
1645
2
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv
Line
Count
Source
1645
13
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv
Line
Count
Source
1645
2
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv
Line
Count
Source
1645
2
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
1646
8
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
1647
1648
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
15
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
15
        return std::make_shared<DataTypeString>();
1654
15
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
11
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
11
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
11
        return std::make_shared<DataTypeString>();
1654
11
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
1
    DataTypes get_variadic_argument_types_impl() const override {
1656
1
        return Impl::get_variadic_argument_types();
1657
1
    }
1658
15
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
11
    size_t get_number_of_arguments() const override { return 2; }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv
1659
1660
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1661
12
                        uint32_t result, size_t input_rows_count) const override {
1662
12
        auto res_column = ColumnString::create();
1663
12
        ColumnPtr argument_column =
1664
12
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
12
        ColumnPtr argument_column_2;
1666
12
        bool is_const;
1667
12
        std::tie(argument_column_2, is_const) =
1668
12
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
12
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
12
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
10
        } else {
1675
10
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
10
                                                          argument_column_2, input_rows_count));
1677
10
        }
1678
1679
12
        block.replace_by_position(result, std::move(res_column));
1680
12
        return Status::OK();
1681
12
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
8
                        uint32_t result, size_t input_rows_count) const override {
1662
8
        auto res_column = ColumnString::create();
1663
8
        ColumnPtr argument_column =
1664
8
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
8
        ColumnPtr argument_column_2;
1666
8
        bool is_const;
1667
8
        std::tie(argument_column_2, is_const) =
1668
8
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
8
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
8
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
6
        } else {
1675
6
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
6
                                                          argument_column_2, input_rows_count));
1677
6
        }
1678
1679
8
        block.replace_by_position(result, std::move(res_column));
1680
8
        return Status::OK();
1681
8
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
1682
};
1683
1684
class FunctionSplitPart : public IFunction {
1685
public:
1686
    static constexpr auto name = "split_part";
1687
2
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
1688
1
    String get_name() const override { return name; }
1689
0
    size_t get_number_of_arguments() const override { return 3; }
1690
1691
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1692
0
        return make_nullable(std::make_shared<DataTypeString>());
1693
0
    }
1694
1695
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1696
0
                        uint32_t result, size_t input_rows_count) const override {
1697
0
        DCHECK_EQ(arguments.size(), 3);
1698
1699
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1700
        // Create a zero column to simply implement
1701
0
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1702
0
        auto res = ColumnString::create();
1703
1704
0
        auto& null_map_data = null_map->get_data();
1705
0
        auto& res_offsets = res->get_offsets();
1706
0
        auto& res_chars = res->get_chars();
1707
0
        res_offsets.resize(input_rows_count);
1708
1709
0
        const size_t argument_size = arguments.size();
1710
0
        std::vector<ColumnPtr> argument_columns(argument_size);
1711
0
        for (size_t i = 0; i < argument_size; ++i) {
1712
0
            argument_columns[i] =
1713
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
1714
0
            if (const auto* nullable =
1715
0
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
1716
                // Danger: Here must dispose the null map data first! Because
1717
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
1718
                // of column nullable mem of null map
1719
0
                VectorizedUtils::update_null_map(null_map->get_data(),
1720
0
                                                 nullable->get_null_map_data());
1721
0
                argument_columns[i] = nullable->get_nested_column_ptr();
1722
0
            }
1723
0
        }
1724
1725
0
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
1726
1727
0
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
1728
1729
0
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
1730
0
        const auto& part_num_col_data = part_num_col->get_data();
1731
1732
0
        for (size_t i = 0; i < input_rows_count; ++i) {
1733
0
            if (part_num_col_data[i] == 0) {
1734
0
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1735
0
                continue;
1736
0
            }
1737
1738
0
            auto delimiter = delimiter_col->get_data_at(i);
1739
0
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
1740
0
            auto part_number = part_num_col_data[i];
1741
0
            auto str = str_col->get_data_at(i);
1742
0
            if (delimiter.size == 0) {
1743
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
1744
0
                continue;
1745
0
            }
1746
1747
0
            if (part_number > 0) {
1748
0
                if (delimiter.size == 1) {
1749
                    // If delimiter is a char, use memchr to split
1750
0
                    int32_t pre_offset = -1;
1751
0
                    int32_t offset = -1;
1752
0
                    int32_t num = 0;
1753
0
                    while (num < part_number) {
1754
0
                        pre_offset = offset;
1755
0
                        size_t n = str.size - offset - 1;
1756
0
                        const char* pos = reinterpret_cast<const char*>(
1757
0
                                memchr(str.data + offset + 1, delimiter_str[0], n));
1758
0
                        if (pos != nullptr) {
1759
0
                            offset = pos - str.data;
1760
0
                            num++;
1761
0
                        } else {
1762
0
                            offset = str.size;
1763
0
                            num = (num == 0) ? 0 : num + 1;
1764
0
                            break;
1765
0
                        }
1766
0
                    }
1767
1768
0
                    if (num == part_number) {
1769
0
                        StringOP::push_value_string(
1770
0
                                std::string_view {
1771
0
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
1772
0
                                        (size_t)offset - pre_offset - 1},
1773
0
                                i, res_chars, res_offsets);
1774
0
                    } else {
1775
0
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1776
0
                    }
1777
0
                } else {
1778
                    // If delimiter is a string, use memmem to split
1779
0
                    int32_t pre_offset = -delimiter.size;
1780
0
                    int32_t offset = -delimiter.size;
1781
0
                    int32_t num = 0;
1782
0
                    while (num < part_number) {
1783
0
                        pre_offset = offset;
1784
0
                        size_t n = str.size - offset - delimiter.size;
1785
0
                        char* pos =
1786
0
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
1787
0
                                                               n, delimiter.data, delimiter.size));
1788
0
                        if (pos != nullptr) {
1789
0
                            offset = pos - str.data;
1790
0
                            num++;
1791
0
                        } else {
1792
0
                            offset = str.size;
1793
0
                            num = (num == 0) ? 0 : num + 1;
1794
0
                            break;
1795
0
                        }
1796
0
                    }
1797
1798
0
                    if (num == part_number) {
1799
0
                        StringOP::push_value_string(
1800
0
                                std::string_view {reinterpret_cast<const char*>(
1801
0
                                                          str.data + pre_offset + delimiter.size),
1802
0
                                                  (size_t)offset - pre_offset - delimiter.size},
1803
0
                                i, res_chars, res_offsets);
1804
0
                    } else {
1805
0
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1806
0
                    }
1807
0
                }
1808
0
            } else {
1809
0
                part_number = -part_number;
1810
0
                auto str_str = str.to_string();
1811
0
                int32_t offset = str.size;
1812
0
                int32_t pre_offset = offset;
1813
0
                int32_t num = 0;
1814
0
                auto substr = str_str;
1815
0
                while (num <= part_number && offset >= 0) {
1816
0
                    offset = (int)substr.rfind(delimiter, offset);
1817
0
                    if (offset != -1) {
1818
0
                        if (++num == part_number) {
1819
0
                            break;
1820
0
                        }
1821
0
                        pre_offset = offset;
1822
0
                        offset = offset - 1;
1823
0
                        substr = str_str.substr(0, pre_offset);
1824
0
                    } else {
1825
0
                        break;
1826
0
                    }
1827
0
                }
1828
0
                num = (offset == -1 && num != 0) ? num + 1 : num;
1829
1830
0
                if (num == part_number) {
1831
0
                    if (offset == -1) {
1832
0
                        StringOP::push_value_string(
1833
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
1834
0
                                                  (size_t)pre_offset},
1835
0
                                i, res_chars, res_offsets);
1836
0
                    } else {
1837
0
                        StringOP::push_value_string(
1838
0
                                std::string_view {str_str.substr(
1839
0
                                        offset + delimiter.size,
1840
0
                                        (size_t)pre_offset - offset - delimiter.size)},
1841
0
                                i, res_chars, res_offsets);
1842
0
                    }
1843
0
                } else {
1844
0
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1845
0
                }
1846
0
            }
1847
0
        }
1848
1849
0
        block.get_by_position(result).column =
1850
0
                ColumnNullable::create(std::move(res), std::move(null_map));
1851
0
        return Status::OK();
1852
0
    }
1853
};
1854
1855
class FunctionSubstringIndex : public IFunction {
1856
public:
1857
    static constexpr auto name = "substring_index";
1858
2
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
1859
1
    String get_name() const override { return name; }
1860
0
    size_t get_number_of_arguments() const override { return 3; }
1861
1862
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1863
0
        return std::make_shared<DataTypeString>();
1864
0
    }
1865
1866
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1867
0
                        uint32_t result, size_t input_rows_count) const override {
1868
0
        DCHECK_EQ(arguments.size(), 3);
1869
1870
        // Create a zero column to simply implement
1871
0
        auto res = ColumnString::create();
1872
1873
0
        auto& res_offsets = res->get_offsets();
1874
0
        auto& res_chars = res->get_chars();
1875
0
        res_offsets.resize(input_rows_count);
1876
0
        ColumnPtr content_column;
1877
0
        bool content_const = false;
1878
0
        std::tie(content_column, content_const) =
1879
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
1880
1881
0
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
1882
1883
        // Handle both constant and non-constant delimiter parameters
1884
0
        ColumnPtr delimiter_column_ptr;
1885
0
        bool delimiter_const = false;
1886
0
        std::tie(delimiter_column_ptr, delimiter_const) =
1887
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
1888
0
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
1889
1890
0
        ColumnPtr part_num_column_ptr;
1891
0
        bool part_num_const = false;
1892
0
        std::tie(part_num_column_ptr, part_num_const) =
1893
0
                unpack_if_const(block.get_by_position(arguments[2]).column);
1894
0
        const ColumnInt32* part_num_col =
1895
0
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
1896
1897
        // For constant multi-character delimiters, create StringRef and StringSearch only once
1898
0
        std::optional<StringRef> const_delimiter_ref;
1899
0
        std::optional<StringSearch> const_search;
1900
0
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
1901
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
1902
0
            const_search.emplace(&const_delimiter_ref.value());
1903
0
        }
1904
1905
0
        for (size_t i = 0; i < input_rows_count; ++i) {
1906
0
            auto str = str_col->get_data_at(content_const ? 0 : i);
1907
0
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
1908
0
            int32_t delimiter_size = delimiter.size;
1909
1910
0
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
1911
1912
0
            if (part_number == 0 || delimiter_size == 0) {
1913
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
1914
0
                continue;
1915
0
            }
1916
1917
0
            if (part_number > 0) {
1918
0
                if (delimiter_size == 1) {
1919
0
                    int32_t offset = -1;
1920
0
                    int32_t num = 0;
1921
0
                    while (num < part_number) {
1922
0
                        size_t n = str.size - offset - 1;
1923
0
                        const char* pos = reinterpret_cast<const char*>(
1924
0
                                memchr(str.data + offset + 1, delimiter.data[0], n));
1925
0
                        if (pos != nullptr) {
1926
0
                            offset = pos - str.data;
1927
0
                            num++;
1928
0
                        } else {
1929
0
                            offset = str.size;
1930
0
                            num = (num == 0) ? 0 : num + 1;
1931
0
                            break;
1932
0
                        }
1933
0
                    }
1934
1935
0
                    if (num == part_number) {
1936
0
                        StringOP::push_value_string(
1937
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
1938
0
                                                  (size_t)offset},
1939
0
                                i, res_chars, res_offsets);
1940
0
                    } else {
1941
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
1942
0
                                                    res_chars, res_offsets);
1943
0
                    }
1944
0
                } else {
1945
                    // For multi-character delimiters
1946
                    // Use pre-created StringRef and StringSearch for constant delimiters
1947
0
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
1948
0
                                                                  : StringRef(delimiter);
1949
0
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
1950
0
                    StringSearch local_search(&delimiter_ref);
1951
0
                    if (!search_ptr) {
1952
0
                        search_ptr = &local_search;
1953
0
                    }
1954
1955
0
                    int32_t offset = -delimiter_size;
1956
0
                    int32_t num = 0;
1957
0
                    while (num < part_number) {
1958
0
                        size_t n = str.size - offset - delimiter_size;
1959
                        // search first match delimter_ref index from src string among str_offset to end
1960
0
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
1961
0
                        if (pos < str.data + str.size) {
1962
0
                            offset = pos - str.data;
1963
0
                            num++;
1964
0
                        } else {
1965
0
                            offset = str.size;
1966
0
                            num = (num == 0) ? 0 : num + 1;
1967
0
                            break;
1968
0
                        }
1969
0
                    }
1970
1971
0
                    if (num == part_number) {
1972
0
                        StringOP::push_value_string(
1973
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
1974
0
                                                  (size_t)offset},
1975
0
                                i, res_chars, res_offsets);
1976
0
                    } else {
1977
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
1978
0
                                                    res_chars, res_offsets);
1979
0
                    }
1980
0
                }
1981
0
            } else {
1982
0
                int neg_part_number = -part_number;
1983
0
                auto str_str = str.to_string();
1984
0
                int32_t offset = str.size;
1985
0
                int32_t pre_offset = offset;
1986
0
                int32_t num = 0;
1987
0
                auto substr = str_str;
1988
1989
                // Use pre-created StringRef for constant delimiters
1990
0
                StringRef delimiter_str =
1991
0
                        const_delimiter_ref
1992
0
                                ? const_delimiter_ref.value()
1993
0
                                : StringRef(reinterpret_cast<const char*>(delimiter.data),
1994
0
                                            delimiter.size);
1995
1996
0
                while (num <= neg_part_number && offset >= 0) {
1997
0
                    offset = (int)substr.rfind(delimiter_str, offset);
1998
0
                    if (offset != -1) {
1999
0
                        if (++num == neg_part_number) {
2000
0
                            break;
2001
0
                        }
2002
0
                        pre_offset = offset;
2003
0
                        offset = offset - 1;
2004
0
                        substr = str_str.substr(0, pre_offset);
2005
0
                    } else {
2006
0
                        break;
2007
0
                    }
2008
0
                }
2009
0
                num = (offset == -1 && num != 0) ? num + 1 : num;
2010
2011
0
                if (num == neg_part_number) {
2012
0
                    if (offset == -1) {
2013
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
2014
0
                                                    res_chars, res_offsets);
2015
0
                    } else {
2016
0
                        StringOP::push_value_string(
2017
0
                                std::string_view {str.data + offset + delimiter_size,
2018
0
                                                  str.size - offset - delimiter_size},
2019
0
                                i, res_chars, res_offsets);
2020
0
                    }
2021
0
                } else {
2022
0
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
2023
0
                                                res_offsets);
2024
0
                }
2025
0
            }
2026
0
        }
2027
2028
0
        block.get_by_position(result).column = std::move(res);
2029
0
        return Status::OK();
2030
0
    }
2031
};
2032
2033
class SplitByStringExecutor {
2034
public:
2035
    using NullMapType = PaddedPODArray<UInt8>;
2036
2037
    static Status execute_core(Block& block, const ColumnNumbers& arguments, uint32_t result,
2038
15
                               size_t input_rows_count, Int32 limit_value) {
2039
15
        const auto& [src_column, left_const] =
2040
15
                unpack_if_const(block.get_by_position(arguments[0]).column);
2041
15
        const auto& [right_column, right_const] =
2042
15
                unpack_if_const(block.get_by_position(arguments[1]).column);
2043
2044
15
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
2045
15
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
2046
15
                                                   ColumnArray::ColumnOffsets::create());
2047
2048
15
        dest_column_ptr->resize(0);
2049
15
        auto& dest_offsets = dest_column_ptr->get_offsets();
2050
2051
15
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
2052
15
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
2053
2054
15
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
2055
15
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
2056
2057
15
        std::visit(
2058
15
                [&](auto src_const, auto delimiter_const) {
2059
15
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2060
15
                                                         *dest_nested_column, dest_offsets,
2061
15
                                                         input_rows_count, limit_value);
2062
15
                },
_ZZN5doris21SplitByStringExecutor12execute_coreERNS_5BlockERKSt6vectorIjSaIjEEjmiENKUlT_T0_E_clISt17integral_constantIbLb0EESD_EEDaS8_S9_
Line
Count
Source
2058
15
                [&](auto src_const, auto delimiter_const) {
2059
15
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2060
15
                                                         *dest_nested_column, dest_offsets,
2061
15
                                                         input_rows_count, limit_value);
2062
15
                },
Unexecuted instantiation: _ZZN5doris21SplitByStringExecutor12execute_coreERNS_5BlockERKSt6vectorIjSaIjEEjmiENKUlT_T0_E_clISt17integral_constantIbLb1EESD_EEDaS8_S9_
Unexecuted instantiation: _ZZN5doris21SplitByStringExecutor12execute_coreERNS_5BlockERKSt6vectorIjSaIjEEjmiENKUlT_T0_E_clISt17integral_constantIbLb1EESC_IbLb0EEEEDaS8_S9_
Unexecuted instantiation: _ZZN5doris21SplitByStringExecutor12execute_coreERNS_5BlockERKSt6vectorIjSaIjEEjmiENKUlT_T0_E_clISt17integral_constantIbLb0EESC_IbLb1EEEEDaS8_S9_
2063
15
                make_bool_variant(left_const), make_bool_variant(right_const));
2064
2065
        // all elements in dest_nested_column are not null
2066
15
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
2067
15
                                                                       false);
2068
15
        block.replace_by_position(result, std::move(dest_column_ptr));
2069
2070
15
        return Status::OK();
2071
15
    }
2072
2073
private:
2074
    template <bool src_const, bool delimiter_const>
2075
    static void _execute(const ColumnString& src_column_string,
2076
                         const ColumnString& delimiter_column, IColumn& dest_nested_column,
2077
15
                         ColumnArray::Offsets64& dest_offsets, size_t size, Int32 limit_value) {
2078
15
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2079
15
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2080
15
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2081
15
        column_string_chars.reserve(0);
2082
2083
15
        ColumnArray::Offset64 string_pos = 0;
2084
15
        ColumnArray::Offset64 dest_pos = 0;
2085
2086
15
        StringSearch search;
2087
15
        StringRef delimiter_ref_for_search;
2088
2089
15
        if constexpr (delimiter_const) {
2090
0
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2091
0
            search.set_pattern(&delimiter_ref_for_search);
2092
0
        }
2093
2094
30
        for (size_t i = 0; i < size; i++) {
2095
15
            const StringRef str_ref =
2096
15
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2097
15
            const StringRef delimiter_ref =
2098
15
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2099
2100
15
            if (str_ref.size == 0) {
2101
1
                dest_offsets.push_back(dest_pos);
2102
1
                continue;
2103
1
            }
2104
14
            if (delimiter_ref.size == 0) {
2105
5
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2106
5
                                      string_pos, dest_pos, limit_value);
2107
9
            } else {
2108
9
                if constexpr (!delimiter_const) {
2109
9
                    search.set_pattern(&delimiter_ref);
2110
9
                }
2111
9
                Int32 split_count = 0;
2112
26
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2113
                    // If limit reached, dump remainder as final token
2114
22
                    if (limit_value > 0 && split_count == limit_value - 1) {
2115
5
                        const size_t remaining = str_ref.size - str_pos;
2116
5
                        const size_t old_size = column_string_chars.size();
2117
5
                        if (remaining > 0) {
2118
5
                            const size_t new_size = old_size + remaining;
2119
5
                            column_string_chars.resize(new_size);
2120
5
                            memcpy_small_allow_read_write_overflow15(
2121
5
                                    column_string_chars.data() + old_size, str_ref.data + str_pos,
2122
5
                                    remaining);
2123
5
                            string_pos += remaining;
2124
5
                        }
2125
5
                        column_string_offsets.push_back(string_pos);
2126
5
                        dest_pos++;
2127
5
                        break;
2128
5
                    }
2129
17
                    const size_t str_offset = str_pos;
2130
17
                    const size_t old_size = column_string_chars.size();
2131
                    // search first match delimter_ref index from src string among str_offset to end
2132
17
                    const char* result_start =
2133
17
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2134
                    // compute split part size
2135
17
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2136
                    // save dist string split part
2137
17
                    if (split_part_size > 0) {
2138
16
                        const size_t new_size = old_size + split_part_size;
2139
16
                        column_string_chars.resize(new_size);
2140
16
                        memcpy_small_allow_read_write_overflow15(
2141
16
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2142
16
                                split_part_size);
2143
                        // add dist string offset
2144
16
                        string_pos += split_part_size;
2145
16
                    }
2146
17
                    column_string_offsets.push_back(string_pos);
2147
                    // array offset + 1
2148
17
                    dest_pos++;
2149
17
                    split_count++;
2150
                    // add src string str_pos to next search start
2151
17
                    str_pos += split_part_size + delimiter_ref.size;
2152
17
                }
2153
9
            }
2154
14
            dest_offsets.push_back(dest_pos);
2155
14
        }
2156
15
    }
_ZN5doris21SplitByStringExecutor8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEmi
Line
Count
Source
2077
15
                         ColumnArray::Offsets64& dest_offsets, size_t size, Int32 limit_value) {
2078
15
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2079
15
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2080
15
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2081
15
        column_string_chars.reserve(0);
2082
2083
15
        ColumnArray::Offset64 string_pos = 0;
2084
15
        ColumnArray::Offset64 dest_pos = 0;
2085
2086
15
        StringSearch search;
2087
15
        StringRef delimiter_ref_for_search;
2088
2089
        if constexpr (delimiter_const) {
2090
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2091
            search.set_pattern(&delimiter_ref_for_search);
2092
        }
2093
2094
30
        for (size_t i = 0; i < size; i++) {
2095
15
            const StringRef str_ref =
2096
15
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2097
15
            const StringRef delimiter_ref =
2098
15
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2099
2100
15
            if (str_ref.size == 0) {
2101
1
                dest_offsets.push_back(dest_pos);
2102
1
                continue;
2103
1
            }
2104
14
            if (delimiter_ref.size == 0) {
2105
5
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2106
5
                                      string_pos, dest_pos, limit_value);
2107
9
            } else {
2108
9
                if constexpr (!delimiter_const) {
2109
9
                    search.set_pattern(&delimiter_ref);
2110
9
                }
2111
9
                Int32 split_count = 0;
2112
26
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2113
                    // If limit reached, dump remainder as final token
2114
22
                    if (limit_value > 0 && split_count == limit_value - 1) {
2115
5
                        const size_t remaining = str_ref.size - str_pos;
2116
5
                        const size_t old_size = column_string_chars.size();
2117
5
                        if (remaining > 0) {
2118
5
                            const size_t new_size = old_size + remaining;
2119
5
                            column_string_chars.resize(new_size);
2120
5
                            memcpy_small_allow_read_write_overflow15(
2121
5
                                    column_string_chars.data() + old_size, str_ref.data + str_pos,
2122
5
                                    remaining);
2123
5
                            string_pos += remaining;
2124
5
                        }
2125
5
                        column_string_offsets.push_back(string_pos);
2126
5
                        dest_pos++;
2127
5
                        break;
2128
5
                    }
2129
17
                    const size_t str_offset = str_pos;
2130
17
                    const size_t old_size = column_string_chars.size();
2131
                    // search first match delimter_ref index from src string among str_offset to end
2132
17
                    const char* result_start =
2133
17
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2134
                    // compute split part size
2135
17
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2136
                    // save dist string split part
2137
17
                    if (split_part_size > 0) {
2138
16
                        const size_t new_size = old_size + split_part_size;
2139
16
                        column_string_chars.resize(new_size);
2140
16
                        memcpy_small_allow_read_write_overflow15(
2141
16
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2142
16
                                split_part_size);
2143
                        // add dist string offset
2144
16
                        string_pos += split_part_size;
2145
16
                    }
2146
17
                    column_string_offsets.push_back(string_pos);
2147
                    // array offset + 1
2148
17
                    dest_pos++;
2149
17
                    split_count++;
2150
                    // add src string str_pos to next search start
2151
17
                    str_pos += split_part_size + delimiter_ref.size;
2152
17
                }
2153
9
            }
2154
14
            dest_offsets.push_back(dest_pos);
2155
14
        }
2156
15
    }
Unexecuted instantiation: _ZN5doris21SplitByStringExecutor8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEmi
Unexecuted instantiation: _ZN5doris21SplitByStringExecutor8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEmi
Unexecuted instantiation: _ZN5doris21SplitByStringExecutor8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEmi
2157
2158
    static void split_empty_delimiter(const StringRef& str_ref,
2159
                                      ColumnString::Chars& column_string_chars,
2160
                                      ColumnString::Offsets& column_string_offsets,
2161
                                      ColumnArray::Offset64& string_pos,
2162
5
                                      ColumnArray::Offset64& dest_pos, Int32 limit_value) {
2163
5
        const size_t old_size = column_string_chars.size();
2164
5
        const size_t new_size = old_size + str_ref.size;
2165
5
        column_string_chars.resize(new_size);
2166
5
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
2167
2168
5
        if (limit_value > 0) {
2169
            // With limit: split character by character up to limit-1, then remainder
2170
4
            Int32 split_count = 0;
2171
4
            size_t i = 0;
2172
4
            if (simd::VStringFunctions::is_ascii(str_ref)) {
2173
10
                for (; i < str_ref.size; i++) {
2174
9
                    if (split_count == limit_value - 1) {
2175
                        // remainder
2176
2
                        string_pos += str_ref.size - i;
2177
2
                        column_string_offsets.push_back(string_pos);
2178
2
                        dest_pos++;
2179
2
                        return;
2180
2
                    }
2181
7
                    string_pos++;
2182
7
                    column_string_offsets.push_back(string_pos);
2183
7
                    dest_pos++;
2184
7
                    split_count++;
2185
7
                }
2186
3
            } else {
2187
2
                for (size_t utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
2188
2
                    utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
2189
2
                    if (split_count == limit_value - 1) {
2190
                        // remainder
2191
1
                        string_pos += str_ref.size - i;
2192
1
                        column_string_offsets.push_back(string_pos);
2193
1
                        dest_pos++;
2194
1
                        return;
2195
1
                    }
2196
1
                    string_pos += utf8_char_len;
2197
1
                    column_string_offsets.push_back(string_pos);
2198
1
                    dest_pos++;
2199
1
                    split_count++;
2200
1
                }
2201
1
            }
2202
4
        } else {
2203
            // No limit: original behavior
2204
1
            if (simd::VStringFunctions::is_ascii(str_ref)) {
2205
1
                const auto size = str_ref.size;
2206
2207
1
                const auto nested_old_size = column_string_offsets.size();
2208
1
                const auto nested_new_size = nested_old_size + size;
2209
1
                column_string_offsets.resize(nested_new_size);
2210
1
                std::iota(column_string_offsets.data() + nested_old_size,
2211
1
                          column_string_offsets.data() + nested_new_size, string_pos + 1);
2212
2213
1
                string_pos += size;
2214
1
                dest_pos += size;
2215
1
            } else {
2216
0
                for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
2217
0
                    utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
2218
2219
0
                    string_pos += utf8_char_len;
2220
0
                    column_string_offsets.push_back(string_pos);
2221
0
                    dest_pos++;
2222
0
                }
2223
0
            }
2224
1
        }
2225
5
    }
2226
};
2227
2228
struct SplitByStringTwoArgImpl {
2229
1
    static DataTypes get_variadic_argument_types() {
2230
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2231
1
    }
2232
2233
    static Status execute_impl(FunctionContext* /*context*/, Block& block,
2234
                               const ColumnNumbers& arguments, uint32_t result,
2235
2
                               size_t input_rows_count) {
2236
2
        DCHECK_EQ(arguments.size(), 2);
2237
2
        return SplitByStringExecutor::execute_core(block, arguments, result, input_rows_count, -1);
2238
2
    }
2239
};
2240
2241
struct SplitByStringThreeArgImpl {
2242
1
    static DataTypes get_variadic_argument_types() {
2243
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2244
1
                std::make_shared<DataTypeInt32>()};
2245
1
    }
2246
2247
    static Status execute_impl(FunctionContext* /*context*/, Block& block,
2248
                               const ColumnNumbers& arguments, uint32_t result,
2249
13
                               size_t input_rows_count) {
2250
13
        DCHECK_EQ(arguments.size(), 3);
2251
13
        const auto& [limit_column, limit_is_const] =
2252
13
                unpack_if_const(block.get_by_position(arguments[2]).column);
2253
13
        auto limit_value = assert_cast<const ColumnInt32&>(*limit_column).get_element(0);
2254
13
        return SplitByStringExecutor::execute_core(block, arguments, result, input_rows_count,
2255
13
                                                   limit_value);
2256
13
    }
2257
};
2258
2259
template <typename Impl>
2260
class FunctionSplitByString : public IFunction {
2261
public:
2262
    static constexpr auto name = "split_by_string";
2263
2264
19
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
_ZN5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE6createEv
Line
Count
Source
2264
4
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
_ZN5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE6createEv
Line
Count
Source
2264
15
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
2265
2266
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE8get_nameB5cxx11Ev
2267
2268
17
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE11is_variadicEv
Line
Count
Source
2268
3
    bool is_variadic() const override { return true; }
_ZNK5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE11is_variadicEv
Line
Count
Source
2268
14
    bool is_variadic() const override { return true; }
2269
2270
0
    size_t get_number_of_arguments() const override {
2271
0
        return get_variadic_argument_types_impl().size();
2272
0
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE23get_number_of_argumentsEv
2273
2274
2
    DataTypes get_variadic_argument_types_impl() const override {
2275
2
        return Impl::get_variadic_argument_types();
2276
2
    }
_ZNK5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2274
1
    DataTypes get_variadic_argument_types_impl() const override {
2275
1
        return Impl::get_variadic_argument_types();
2276
1
    }
_ZNK5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2274
1
    DataTypes get_variadic_argument_types_impl() const override {
2275
1
        return Impl::get_variadic_argument_types();
2276
1
    }
2277
2278
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2279
15
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
2280
0
                << "first argument for function: " << name << " should be string"
2281
0
                << " and arguments[0] is " << arguments[0]->get_name();
2282
15
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
2283
0
                << "second argument for function: " << name << " should be string"
2284
0
                << " and arguments[1] is " << arguments[1]->get_name();
2285
15
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
2286
15
    }
_ZNK5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2278
2
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2279
2
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
2280
0
                << "first argument for function: " << name << " should be string"
2281
0
                << " and arguments[0] is " << arguments[0]->get_name();
2282
2
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
2283
0
                << "second argument for function: " << name << " should be string"
2284
0
                << " and arguments[1] is " << arguments[1]->get_name();
2285
2
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
2286
2
    }
_ZNK5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2278
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2279
13
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
2280
0
                << "first argument for function: " << name << " should be string"
2281
0
                << " and arguments[0] is " << arguments[0]->get_name();
2282
13
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
2283
0
                << "second argument for function: " << name << " should be string"
2284
0
                << " and arguments[1] is " << arguments[1]->get_name();
2285
13
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
2286
13
    }
2287
2288
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2289
15
                        uint32_t result, size_t input_rows_count) const override {
2290
15
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
2291
15
    }
_ZNK5doris21FunctionSplitByStringINS_23SplitByStringTwoArgImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2289
2
                        uint32_t result, size_t input_rows_count) const override {
2290
2
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
2291
2
    }
_ZNK5doris21FunctionSplitByStringINS_25SplitByStringThreeArgImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2289
13
                        uint32_t result, size_t input_rows_count) const override {
2290
13
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
2291
13
    }
2292
};
2293
2294
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
2295
2296
template <FunctionCountSubStringType type>
2297
class FunctionCountSubString : public IFunction {
2298
public:
2299
    static constexpr auto name = "count_substrings";
2300
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
2301
2302
223
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
2302
45
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
2302
178
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
2303
    using NullMapType = PaddedPODArray<UInt8>;
2304
2305
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
2306
2307
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
2308
2309
219
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2310
219
        return std::make_shared<DataTypeInt32>();
2311
219
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2309
43
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2310
43
        return std::make_shared<DataTypeInt32>();
2311
43
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2309
176
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2310
176
        return std::make_shared<DataTypeInt32>();
2311
176
    }
2312
2313
2
    DataTypes get_variadic_argument_types_impl() const override {
2314
2
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2315
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2316
1
        } else {
2317
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2318
1
                    std::make_shared<DataTypeInt32>()};
2319
1
        }
2320
2
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
2313
1
    DataTypes get_variadic_argument_types_impl() const override {
2314
1
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2315
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2316
        } else {
2317
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2318
                    std::make_shared<DataTypeInt32>()};
2319
        }
2320
1
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
2313
1
    DataTypes get_variadic_argument_types_impl() const override {
2314
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2315
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2316
1
        } else {
2317
1
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2318
1
                    std::make_shared<DataTypeInt32>()};
2319
1
        }
2320
1
    }
2321
2322
221
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
2322
44
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
2322
177
    bool is_variadic() const override { return true; }
2323
2324
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
2325
189
                        uint32_t result, size_t input_rows_count) const override {
2326
189
        DCHECK(arg_count);
2327
189
        bool col_const[arg_count];
2328
189
        ColumnPtr argument_columns[arg_count];
2329
722
        for (int i = 0; i < arg_count; ++i) {
2330
533
            std::tie(argument_columns[i], col_const[i]) =
2331
533
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2332
533
        }
2333
2334
189
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2335
189
        auto& dest_column_data = dest_column_ptr->get_data();
2336
2337
189
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2338
34
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2339
34
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2340
34
            std::visit(
2341
34
                    [&](auto str_const, auto pattern_const) {
2342
34
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2343
34
                                                           dest_column_data, input_rows_count);
2344
34
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
2341
12
                    [&](auto str_const, auto pattern_const) {
2342
12
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2343
12
                                                           dest_column_data, input_rows_count);
2344
12
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
2341
11
                    [&](auto str_const, auto pattern_const) {
2342
11
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2343
11
                                                           dest_column_data, input_rows_count);
2344
11
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
2341
11
                    [&](auto str_const, auto pattern_const) {
2342
11
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2343
11
                                                           dest_column_data, input_rows_count);
2344
11
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
2345
34
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2346
155
        } else {
2347
155
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2348
155
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2349
155
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2350
155
            std::visit(
2351
155
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
155
                        _execute<str_const, pattern_const, start_pos_const>(
2353
155
                                src_column_string, pattern_column, start_pos_column,
2354
155
                                dest_column_data, input_rows_count);
2355
155
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
2351
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
23
                        _execute<str_const, pattern_const, start_pos_const>(
2353
23
                                src_column_string, pattern_column, start_pos_column,
2354
23
                                dest_column_data, input_rows_count);
2355
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
2351
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
22
                        _execute<str_const, pattern_const, start_pos_const>(
2353
22
                                src_column_string, pattern_column, start_pos_column,
2354
22
                                dest_column_data, input_rows_count);
2355
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
2351
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
22
                        _execute<str_const, pattern_const, start_pos_const>(
2353
22
                                src_column_string, pattern_column, start_pos_column,
2354
22
                                dest_column_data, input_rows_count);
2355
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
2351
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
22
                        _execute<str_const, pattern_const, start_pos_const>(
2353
22
                                src_column_string, pattern_column, start_pos_column,
2354
22
                                dest_column_data, input_rows_count);
2355
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
2351
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
22
                        _execute<str_const, pattern_const, start_pos_const>(
2353
22
                                src_column_string, pattern_column, start_pos_column,
2354
22
                                dest_column_data, input_rows_count);
2355
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
2351
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
22
                        _execute<str_const, pattern_const, start_pos_const>(
2353
22
                                src_column_string, pattern_column, start_pos_column,
2354
22
                                dest_column_data, input_rows_count);
2355
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
2351
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
22
                        _execute<str_const, pattern_const, start_pos_const>(
2353
22
                                src_column_string, pattern_column, start_pos_column,
2354
22
                                dest_column_data, input_rows_count);
2355
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
2356
155
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2357
155
                    make_bool_variant(col_const[2]));
2358
155
        }
2359
2360
189
        block.replace_by_position(result, std::move(dest_column_ptr));
2361
189
        return Status::OK();
2362
189
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2325
34
                        uint32_t result, size_t input_rows_count) const override {
2326
34
        DCHECK(arg_count);
2327
34
        bool col_const[arg_count];
2328
34
        ColumnPtr argument_columns[arg_count];
2329
102
        for (int i = 0; i < arg_count; ++i) {
2330
68
            std::tie(argument_columns[i], col_const[i]) =
2331
68
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2332
68
        }
2333
2334
34
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2335
34
        auto& dest_column_data = dest_column_ptr->get_data();
2336
2337
34
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2338
34
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2339
34
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2340
34
            std::visit(
2341
34
                    [&](auto str_const, auto pattern_const) {
2342
34
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2343
34
                                                           dest_column_data, input_rows_count);
2344
34
                    },
2345
34
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2346
        } else {
2347
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2348
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2349
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2350
            std::visit(
2351
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
                        _execute<str_const, pattern_const, start_pos_const>(
2353
                                src_column_string, pattern_column, start_pos_column,
2354
                                dest_column_data, input_rows_count);
2355
                    },
2356
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2357
                    make_bool_variant(col_const[2]));
2358
        }
2359
2360
34
        block.replace_by_position(result, std::move(dest_column_ptr));
2361
34
        return Status::OK();
2362
34
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2325
155
                        uint32_t result, size_t input_rows_count) const override {
2326
155
        DCHECK(arg_count);
2327
155
        bool col_const[arg_count];
2328
155
        ColumnPtr argument_columns[arg_count];
2329
620
        for (int i = 0; i < arg_count; ++i) {
2330
465
            std::tie(argument_columns[i], col_const[i]) =
2331
465
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2332
465
        }
2333
2334
155
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2335
155
        auto& dest_column_data = dest_column_ptr->get_data();
2336
2337
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2338
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2339
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2340
            std::visit(
2341
                    [&](auto str_const, auto pattern_const) {
2342
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2343
                                                           dest_column_data, input_rows_count);
2344
                    },
2345
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2346
155
        } else {
2347
155
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2348
155
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2349
155
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2350
155
            std::visit(
2351
155
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2352
155
                        _execute<str_const, pattern_const, start_pos_const>(
2353
155
                                src_column_string, pattern_column, start_pos_column,
2354
155
                                dest_column_data, input_rows_count);
2355
155
                    },
2356
155
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2357
155
                    make_bool_variant(col_const[2]));
2358
155
        }
2359
2360
155
        block.replace_by_position(result, std::move(dest_column_ptr));
2361
155
        return Status::OK();
2362
155
    }
2363
2364
private:
2365
    template <bool src_const, bool pattern_const>
2366
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
2367
34
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2368
81
        for (size_t i = 0; i < size; i++) {
2369
47
            const StringRef str_ref =
2370
47
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2371
2372
47
            const StringRef pattern_ref =
2373
47
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2374
47
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2375
47
        }
2376
34
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2367
12
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2368
37
        for (size_t i = 0; i < size; i++) {
2369
25
            const StringRef str_ref =
2370
25
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2371
2372
25
            const StringRef pattern_ref =
2373
25
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2374
25
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2375
25
        }
2376
12
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2367
11
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2368
22
        for (size_t i = 0; i < size; i++) {
2369
11
            const StringRef str_ref =
2370
11
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2371
2372
11
            const StringRef pattern_ref =
2373
11
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2374
11
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2375
11
        }
2376
11
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2367
11
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2368
22
        for (size_t i = 0; i < size; i++) {
2369
11
            const StringRef str_ref =
2370
11
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2371
2372
11
            const StringRef pattern_ref =
2373
11
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2374
11
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2375
11
        }
2376
11
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
2377
2378
    template <bool src_const, bool pattern_const, bool start_pos_const>
2379
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
2380
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
2381
155
                  size_t size) const {
2382
334
        for (size_t i = 0; i < size; i++) {
2383
179
            const StringRef str_ref =
2384
179
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
179
            const StringRef pattern_ref =
2386
179
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
179
            int32_t start_pos =
2389
179
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
179
            const char* p = str_ref.begin();
2392
179
            const char* end = str_ref.end();
2393
179
            int char_size = 0;
2394
1.22k
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
1.04k
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
1.04k
            }
2397
179
            const auto start_byte_len = p - str_ref.begin();
2398
2399
179
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
115
                dest_column_data[i] = 0;
2401
115
            } else {
2402
64
                dest_column_data[i] =
2403
64
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
64
            }
2405
179
        }
2406
155
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
23
                  size_t size) const {
2382
70
        for (size_t i = 0; i < size; i++) {
2383
47
            const StringRef str_ref =
2384
47
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
47
            const StringRef pattern_ref =
2386
47
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
47
            int32_t start_pos =
2389
47
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
47
            const char* p = str_ref.begin();
2392
47
            const char* end = str_ref.end();
2393
47
            int char_size = 0;
2394
316
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
269
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
269
            }
2397
47
            const auto start_byte_len = p - str_ref.begin();
2398
2399
47
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
31
                dest_column_data[i] = 0;
2401
31
            } else {
2402
16
                dest_column_data[i] =
2403
16
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
16
            }
2405
47
        }
2406
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
22
                  size_t size) const {
2382
44
        for (size_t i = 0; i < size; i++) {
2383
22
            const StringRef str_ref =
2384
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
22
            const StringRef pattern_ref =
2386
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
22
            int32_t start_pos =
2389
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
22
            const char* p = str_ref.begin();
2392
22
            const char* end = str_ref.end();
2393
22
            int char_size = 0;
2394
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
129
            }
2397
22
            const auto start_byte_len = p - str_ref.begin();
2398
2399
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
14
                dest_column_data[i] = 0;
2401
14
            } else {
2402
8
                dest_column_data[i] =
2403
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
8
            }
2405
22
        }
2406
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
22
                  size_t size) const {
2382
44
        for (size_t i = 0; i < size; i++) {
2383
22
            const StringRef str_ref =
2384
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
22
            const StringRef pattern_ref =
2386
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
22
            int32_t start_pos =
2389
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
22
            const char* p = str_ref.begin();
2392
22
            const char* end = str_ref.end();
2393
22
            int char_size = 0;
2394
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
129
            }
2397
22
            const auto start_byte_len = p - str_ref.begin();
2398
2399
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
14
                dest_column_data[i] = 0;
2401
14
            } else {
2402
8
                dest_column_data[i] =
2403
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
8
            }
2405
22
        }
2406
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
22
                  size_t size) const {
2382
44
        for (size_t i = 0; i < size; i++) {
2383
22
            const StringRef str_ref =
2384
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
22
            const StringRef pattern_ref =
2386
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
22
            int32_t start_pos =
2389
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
22
            const char* p = str_ref.begin();
2392
22
            const char* end = str_ref.end();
2393
22
            int char_size = 0;
2394
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
129
            }
2397
22
            const auto start_byte_len = p - str_ref.begin();
2398
2399
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
14
                dest_column_data[i] = 0;
2401
14
            } else {
2402
8
                dest_column_data[i] =
2403
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
8
            }
2405
22
        }
2406
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
22
                  size_t size) const {
2382
44
        for (size_t i = 0; i < size; i++) {
2383
22
            const StringRef str_ref =
2384
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
22
            const StringRef pattern_ref =
2386
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
22
            int32_t start_pos =
2389
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
22
            const char* p = str_ref.begin();
2392
22
            const char* end = str_ref.end();
2393
22
            int char_size = 0;
2394
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
129
            }
2397
22
            const auto start_byte_len = p - str_ref.begin();
2398
2399
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
14
                dest_column_data[i] = 0;
2401
14
            } else {
2402
8
                dest_column_data[i] =
2403
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
8
            }
2405
22
        }
2406
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
22
                  size_t size) const {
2382
44
        for (size_t i = 0; i < size; i++) {
2383
22
            const StringRef str_ref =
2384
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
22
            const StringRef pattern_ref =
2386
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
22
            int32_t start_pos =
2389
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
22
            const char* p = str_ref.begin();
2392
22
            const char* end = str_ref.end();
2393
22
            int char_size = 0;
2394
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
129
            }
2397
22
            const auto start_byte_len = p - str_ref.begin();
2398
2399
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
14
                dest_column_data[i] = 0;
2401
14
            } else {
2402
8
                dest_column_data[i] =
2403
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
8
            }
2405
22
        }
2406
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
Line
Count
Source
2381
22
                  size_t size) const {
2382
44
        for (size_t i = 0; i < size; i++) {
2383
22
            const StringRef str_ref =
2384
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2385
22
            const StringRef pattern_ref =
2386
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2387
            // 1-based index
2388
22
            int32_t start_pos =
2389
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2390
2391
22
            const char* p = str_ref.begin();
2392
22
            const char* end = str_ref.end();
2393
22
            int char_size = 0;
2394
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2395
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2396
129
            }
2397
22
            const auto start_byte_len = p - str_ref.begin();
2398
2399
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2400
14
                dest_column_data[i] = 0;
2401
14
            } else {
2402
8
                dest_column_data[i] =
2403
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2404
8
            }
2405
22
        }
2406
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEEm
2407
2408
208
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2409
208
        size_t old_size = pos;
2410
208
        size_t str_size = str_ref.size;
2411
1.15k
        while (pos < str_size &&
2412
1.15k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2413
1.06k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2414
948
            pos++;
2415
948
        }
2416
208
        return pos - old_size;
2417
208
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
2408
56
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2409
56
        size_t old_size = pos;
2410
56
        size_t str_size = str_ref.size;
2411
372
        while (pos < str_size &&
2412
372
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2413
344
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2414
316
            pos++;
2415
316
        }
2416
56
        return pos - old_size;
2417
56
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
2408
152
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2409
152
        size_t old_size = pos;
2410
152
        size_t str_size = str_ref.size;
2411
784
        while (pos < str_size &&
2412
784
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2413
720
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2414
632
            pos++;
2415
632
        }
2416
152
        return pos - old_size;
2417
152
    }
2418
2419
111
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2420
111
        int count = 0;
2421
111
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2422
19
            return 0;
2423
92
        } else {
2424
208
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2425
208
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2426
208
                if (res_pos == (str_ref.size - str_pos)) {
2427
92
                    break; // not find
2428
92
                }
2429
116
                count++;
2430
116
                str_pos = str_pos + res_pos + pattern_ref.size;
2431
116
            }
2432
92
        }
2433
92
        return count;
2434
111
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
2419
47
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2420
47
        int count = 0;
2421
47
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2422
19
            return 0;
2423
28
        } else {
2424
56
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2425
56
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2426
56
                if (res_pos == (str_ref.size - str_pos)) {
2427
28
                    break; // not find
2428
28
                }
2429
28
                count++;
2430
28
                str_pos = str_pos + res_pos + pattern_ref.size;
2431
28
            }
2432
28
        }
2433
28
        return count;
2434
47
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
2419
64
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2420
64
        int count = 0;
2421
64
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2422
0
            return 0;
2423
64
        } else {
2424
152
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2425
152
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2426
152
                if (res_pos == (str_ref.size - str_pos)) {
2427
64
                    break; // not find
2428
64
                }
2429
88
                count++;
2430
88
                str_pos = str_pos + res_pos + pattern_ref.size;
2431
88
            }
2432
64
        }
2433
64
        return count;
2434
64
    }
2435
};
2436
2437
struct SM3Sum {
2438
    static constexpr auto name = "sm3sum";
2439
    using ObjectData = SM3Digest;
2440
};
2441
2442
struct MD5Sum {
2443
    static constexpr auto name = "md5sum";
2444
    using ObjectData = Md5Digest;
2445
};
2446
2447
template <typename Impl>
2448
class FunctionStringDigestMulti : public IFunction {
2449
public:
2450
    static constexpr auto name = Impl::name;
2451
208
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6SM3SumEE6createEv
Line
Count
Source
2451
104
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6MD5SumEE6createEv
Line
Count
Source
2451
104
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
2452
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE8get_nameB5cxx11Ev
2453
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE23get_number_of_argumentsEv
2454
206
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE11is_variadicEv
Line
Count
Source
2454
103
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE11is_variadicEv
Line
Count
Source
2454
103
    bool is_variadic() const override { return true; }
2455
2456
204
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2457
204
        return std::make_shared<DataTypeString>();
2458
204
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2456
102
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2457
102
        return std::make_shared<DataTypeString>();
2458
102
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2456
102
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2457
102
        return std::make_shared<DataTypeString>();
2458
102
    }
2459
2460
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2461
160
                        uint32_t result, size_t input_rows_count) const override {
2462
160
        DCHECK_GE(arguments.size(), 1);
2463
2464
160
        auto res = ColumnString::create();
2465
160
        auto& res_data = res->get_chars();
2466
160
        auto& res_offset = res->get_offsets();
2467
160
        res_offset.resize(input_rows_count);
2468
2469
160
        std::vector<ColumnPtr> argument_columns(arguments.size());
2470
160
        std::vector<uint8_t> is_const(arguments.size(), 0);
2471
536
        for (size_t i = 0; i < arguments.size(); ++i) {
2472
376
            std::tie(argument_columns[i], is_const[i]) =
2473
376
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2474
376
        }
2475
2476
160
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2477
80
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2478
80
                                         res_data, res_offset);
2479
80
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2480
80
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2481
80
                                            res_data, res_offset);
2482
80
        } else {
2483
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2484
0
                                        argument_columns[0]->get_name(), get_name());
2485
0
        }
2486
2487
160
        block.replace_by_position(result, std::move(res));
2488
160
        return Status::OK();
2489
160
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2461
80
                        uint32_t result, size_t input_rows_count) const override {
2462
80
        DCHECK_GE(arguments.size(), 1);
2463
2464
80
        auto res = ColumnString::create();
2465
80
        auto& res_data = res->get_chars();
2466
80
        auto& res_offset = res->get_offsets();
2467
80
        res_offset.resize(input_rows_count);
2468
2469
80
        std::vector<ColumnPtr> argument_columns(arguments.size());
2470
80
        std::vector<uint8_t> is_const(arguments.size(), 0);
2471
268
        for (size_t i = 0; i < arguments.size(); ++i) {
2472
188
            std::tie(argument_columns[i], is_const[i]) =
2473
188
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2474
188
        }
2475
2476
80
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2477
40
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2478
40
                                         res_data, res_offset);
2479
40
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2480
40
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2481
40
                                            res_data, res_offset);
2482
40
        } else {
2483
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2484
0
                                        argument_columns[0]->get_name(), get_name());
2485
0
        }
2486
2487
80
        block.replace_by_position(result, std::move(res));
2488
80
        return Status::OK();
2489
80
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2461
80
                        uint32_t result, size_t input_rows_count) const override {
2462
80
        DCHECK_GE(arguments.size(), 1);
2463
2464
80
        auto res = ColumnString::create();
2465
80
        auto& res_data = res->get_chars();
2466
80
        auto& res_offset = res->get_offsets();
2467
80
        res_offset.resize(input_rows_count);
2468
2469
80
        std::vector<ColumnPtr> argument_columns(arguments.size());
2470
80
        std::vector<uint8_t> is_const(arguments.size(), 0);
2471
268
        for (size_t i = 0; i < arguments.size(); ++i) {
2472
188
            std::tie(argument_columns[i], is_const[i]) =
2473
188
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2474
188
        }
2475
2476
80
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2477
40
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2478
40
                                         res_data, res_offset);
2479
40
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2480
40
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2481
40
                                            res_data, res_offset);
2482
40
        } else {
2483
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2484
0
                                        argument_columns[0]->get_name(), get_name());
2485
0
        }
2486
2487
80
        block.replace_by_position(result, std::move(res));
2488
80
        return Status::OK();
2489
80
    }
2490
2491
private:
2492
    template <typename ColumnType>
2493
    void vector_execute(Block& block, size_t input_rows_count,
2494
                        const std::vector<ColumnPtr>& argument_columns,
2495
                        const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data,
2496
160
                        ColumnString::Offsets& res_offset) const {
2497
160
        using ObjectData = typename Impl::ObjectData;
2498
372
        for (size_t i = 0; i < input_rows_count; ++i) {
2499
212
            ObjectData digest;
2500
676
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2501
464
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2502
464
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2503
464
                if (data_ref.size < 1) {
2504
120
                    continue;
2505
120
                }
2506
344
                digest.update(data_ref.data, data_ref.size);
2507
344
            }
2508
212
            digest.digest();
2509
212
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2510
212
                                        i, res_data, res_offset);
2511
212
        }
2512
160
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
2496
40
                        ColumnString::Offsets& res_offset) const {
2497
40
        using ObjectData = typename Impl::ObjectData;
2498
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2499
53
            ObjectData digest;
2500
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2501
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2502
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2503
116
                if (data_ref.size < 1) {
2504
30
                    continue;
2505
30
                }
2506
86
                digest.update(data_ref.data, data_ref.size);
2507
86
            }
2508
53
            digest.digest();
2509
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2510
53
                                        i, res_data, res_offset);
2511
53
        }
2512
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
2496
40
                        ColumnString::Offsets& res_offset) const {
2497
40
        using ObjectData = typename Impl::ObjectData;
2498
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2499
53
            ObjectData digest;
2500
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2501
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2502
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2503
116
                if (data_ref.size < 1) {
2504
30
                    continue;
2505
30
                }
2506
86
                digest.update(data_ref.data, data_ref.size);
2507
86
            }
2508
53
            digest.digest();
2509
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2510
53
                                        i, res_data, res_offset);
2511
53
        }
2512
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
2496
40
                        ColumnString::Offsets& res_offset) const {
2497
40
        using ObjectData = typename Impl::ObjectData;
2498
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2499
53
            ObjectData digest;
2500
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2501
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2502
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2503
116
                if (data_ref.size < 1) {
2504
30
                    continue;
2505
30
                }
2506
86
                digest.update(data_ref.data, data_ref.size);
2507
86
            }
2508
53
            digest.digest();
2509
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2510
53
                                        i, res_data, res_offset);
2511
53
        }
2512
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
2496
40
                        ColumnString::Offsets& res_offset) const {
2497
40
        using ObjectData = typename Impl::ObjectData;
2498
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2499
53
            ObjectData digest;
2500
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2501
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2502
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2503
116
                if (data_ref.size < 1) {
2504
30
                    continue;
2505
30
                }
2506
86
                digest.update(data_ref.data, data_ref.size);
2507
86
            }
2508
53
            digest.digest();
2509
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2510
53
                                        i, res_data, res_offset);
2511
53
        }
2512
40
    }
2513
};
2514
2515
class FunctionStringDigestSHA1 : public IFunction {
2516
public:
2517
    static constexpr auto name = "sha1";
2518
14
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA1>(); }
2519
0
    String get_name() const override { return name; }
2520
0
    size_t get_number_of_arguments() const override { return 1; }
2521
13
    bool is_variadic() const override { return true; }
2522
2523
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2524
12
        return std::make_shared<DataTypeString>();
2525
12
    }
2526
2527
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2528
10
                        uint32_t result, size_t input_rows_count) const override {
2529
10
        DCHECK_EQ(arguments.size(), 1);
2530
10
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
2531
2532
10
        auto res_col = ColumnString::create();
2533
10
        auto& res_data = res_col->get_chars();
2534
10
        auto& res_offset = res_col->get_offsets();
2535
10
        res_offset.resize(input_rows_count);
2536
10
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2537
5
            vector_execute(str_col, input_rows_count, res_data, res_offset);
2538
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2539
5
            vector_execute(vb_col, input_rows_count, res_data, res_offset);
2540
5
        } else {
2541
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2542
0
                                        data_col->get_name(), get_name());
2543
0
        }
2544
2545
10
        block.replace_by_position(result, std::move(res_col));
2546
10
        return Status::OK();
2547
10
    }
2548
2549
private:
2550
    template <typename ColumnType>
2551
    void vector_execute(const ColumnType* col, size_t input_rows_count,
2552
10
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2553
10
        SHA1Digest digest;
2554
28
        for (size_t i = 0; i < input_rows_count; ++i) {
2555
18
            StringRef data_ref = col->get_data_at(i);
2556
18
            digest.reset(data_ref.data, data_ref.size);
2557
18
            std::string_view ans = digest.digest();
2558
2559
18
            StringOP::push_value_string(ans, i, res_data, res_offset);
2560
18
        }
2561
10
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_9ColumnStrIjEEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Line
Count
Source
2552
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2553
5
        SHA1Digest digest;
2554
14
        for (size_t i = 0; i < input_rows_count; ++i) {
2555
9
            StringRef data_ref = col->get_data_at(i);
2556
9
            digest.reset(data_ref.data, data_ref.size);
2557
9
            std::string_view ans = digest.digest();
2558
2559
9
            StringOP::push_value_string(ans, i, res_data, res_offset);
2560
9
        }
2561
5
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_15ColumnVarbinaryEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS6_IjLm4096ES9_Lm16ELm15EEE
Line
Count
Source
2552
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2553
5
        SHA1Digest digest;
2554
14
        for (size_t i = 0; i < input_rows_count; ++i) {
2555
9
            StringRef data_ref = col->get_data_at(i);
2556
9
            digest.reset(data_ref.data, data_ref.size);
2557
9
            std::string_view ans = digest.digest();
2558
2559
9
            StringOP::push_value_string(ans, i, res_data, res_offset);
2560
9
        }
2561
5
    }
2562
};
2563
2564
class FunctionStringDigestSHA2 : public IFunction {
2565
public:
2566
    static constexpr auto name = "sha2";
2567
2
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA2>(); }
2568
0
    String get_name() const override { return name; }
2569
0
    size_t get_number_of_arguments() const override { return 2; }
2570
1
    bool is_variadic() const override { return true; }
2571
2572
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2573
0
        return std::make_shared<DataTypeString>();
2574
0
    }
2575
2576
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2577
0
                        uint32_t result, size_t input_rows_count) const override {
2578
0
        DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column));
2579
2580
0
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
2581
2582
0
        [[maybe_unused]] const auto& [right_column, right_const] =
2583
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
2584
0
        auto digest_length = assert_cast<const ColumnInt32*>(right_column.get())->get_data()[0];
2585
2586
0
        auto res_col = ColumnString::create();
2587
0
        auto& res_data = res_col->get_chars();
2588
0
        auto& res_offset = res_col->get_offsets();
2589
0
        res_offset.resize(input_rows_count);
2590
2591
0
        if (digest_length == 224) {
2592
0
            execute_base<SHA224Digest>(data_col, input_rows_count, res_data, res_offset);
2593
0
        } else if (digest_length == 256) {
2594
0
            execute_base<SHA256Digest>(data_col, input_rows_count, res_data, res_offset);
2595
0
        } else if (digest_length == 384) {
2596
0
            execute_base<SHA384Digest>(data_col, input_rows_count, res_data, res_offset);
2597
0
        } else if (digest_length == 512) {
2598
0
            execute_base<SHA512Digest>(data_col, input_rows_count, res_data, res_offset);
2599
0
        } else {
2600
0
            return Status::InvalidArgument(
2601
0
                    "sha2's digest length only support 224/256/384/512 but meet {}", digest_length);
2602
0
        }
2603
2604
0
        block.replace_by_position(result, std::move(res_col));
2605
0
        return Status::OK();
2606
0
    }
2607
2608
private:
2609
    template <typename T>
2610
    void execute_base(ColumnPtr data_col, int input_rows_count, ColumnString::Chars& res_data,
2611
0
                      ColumnString::Offsets& res_offset) const {
2612
0
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2613
0
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2614
0
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2615
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2616
0
        } else {
2617
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2618
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2619
0
                            get_name());
2620
0
        }
2621
0
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA224DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA256DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA384DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA512DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
2622
2623
    template <typename DigestType, typename ColumnType>
2624
    void vector_execute(const ColumnType* col, size_t input_rows_count,
2625
0
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2626
0
        DigestType digest;
2627
0
        for (size_t i = 0; i < input_rows_count; ++i) {
2628
0
            StringRef data_ref = col->get_data_at(i);
2629
0
            digest.reset(data_ref.data, data_ref.size);
2630
0
            std::string_view ans = digest.digest();
2631
2632
0
            StringOP::push_value_string(ans, i, res_data, res_offset);
2633
0
        }
2634
0
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
2635
};
2636
2637
class FunctionExtractURLParameter : public IFunction {
2638
public:
2639
    static constexpr auto name = "extract_url_parameter";
2640
39
    static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); }
2641
1
    String get_name() const override { return name; }
2642
37
    size_t get_number_of_arguments() const override { return 2; }
2643
2644
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2645
37
        return std::make_shared<DataTypeString>();
2646
37
    }
2647
2648
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2649
37
                        uint32_t result, size_t input_rows_count) const override {
2650
37
        auto col_url =
2651
37
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
2652
37
        auto col_parameter =
2653
37
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
2654
37
        auto url_col = assert_cast<const ColumnString*>(col_url.get());
2655
37
        auto parameter_col = assert_cast<const ColumnString*>(col_parameter.get());
2656
2657
37
        ColumnString::MutablePtr col_res = ColumnString::create();
2658
2659
85
        for (int i = 0; i < input_rows_count; ++i) {
2660
48
            auto source = url_col->get_data_at(i);
2661
48
            auto param = parameter_col->get_data_at(i);
2662
48
            auto res = extract_url(source, param);
2663
2664
48
            col_res->insert_data(res.data, res.size);
2665
48
        }
2666
2667
37
        block.replace_by_position(result, std::move(col_res));
2668
37
        return Status::OK();
2669
37
    }
2670
2671
private:
2672
48
    StringRef extract_url(StringRef url, StringRef parameter) const {
2673
48
        if (url.size == 0 || parameter.size == 0) {
2674
8
            return StringRef("", 0);
2675
8
        }
2676
40
        return UrlParser::extract_url(url, parameter);
2677
48
    }
2678
};
2679
2680
class FunctionStringParseUrl : public IFunction {
2681
public:
2682
    static constexpr auto name = "parse_url";
2683
95
    static FunctionPtr create() { return std::make_shared<FunctionStringParseUrl>(); }
2684
0
    String get_name() const override { return name; }
2685
0
    size_t get_number_of_arguments() const override { return 0; }
2686
94
    bool is_variadic() const override { return true; }
2687
2688
93
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2689
93
        return make_nullable(std::make_shared<DataTypeString>());
2690
93
    }
2691
2692
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2693
93
                        uint32_t result, size_t input_rows_count) const override {
2694
93
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2695
93
        auto& null_map_data = null_map->get_data();
2696
93
        DCHECK_GE(3, arguments.size());
2697
93
        auto res = ColumnString::create();
2698
93
        auto& res_offsets = res->get_offsets();
2699
93
        auto& res_chars = res->get_chars();
2700
93
        res_offsets.resize(input_rows_count);
2701
2702
93
        size_t argument_size = arguments.size();
2703
93
        const bool has_key = argument_size == 3;
2704
2705
93
        std::vector<ColumnPtr> argument_columns(argument_size);
2706
93
        std::vector<UInt8> col_const(argument_size);
2707
308
        for (size_t i = 0; i < argument_size; ++i) {
2708
215
            std::tie(argument_columns[i], col_const[i]) =
2709
215
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2710
215
        }
2711
2712
93
        const auto* url_col = assert_cast<const ColumnString*>(argument_columns[0].get());
2713
93
        const auto* part_col = assert_cast<const ColumnString*>(argument_columns[1].get());
2714
93
        const bool part_const = col_const[1];
2715
93
        std::vector<UrlParser::UrlPart> url_parts;
2716
93
        const int part_nums = part_const ? 1 : input_rows_count;
2717
2718
93
        url_parts.resize(part_nums);
2719
209
        for (int i = 0; i < part_nums; i++) {
2720
116
            StringRef part = part_col->get_data_at(i);
2721
116
            UrlParser::UrlPart url_part = UrlParser::get_url_part(part);
2722
116
            if (url_part == UrlParser::INVALID) {
2723
0
                return Status::RuntimeError("Invalid URL part: {}\n{}",
2724
0
                                            std::string(part.data, part.size),
2725
0
                                            "(Valid URL parts are 'PROTOCOL', 'HOST', "
2726
0
                                            "'PATH', 'REF', 'AUTHORITY', "
2727
0
                                            "'FILE', 'USERINFO', 'PORT' and 'QUERY')");
2728
0
            }
2729
116
            url_parts[i] = url_part;
2730
116
        }
2731
2732
93
        if (has_key) {
2733
29
            const bool url_const = col_const[0];
2734
29
            const bool key_const = col_const[2];
2735
29
            const auto* key_col = assert_cast<const ColumnString*>(argument_columns[2].get());
2736
29
            RETURN_IF_ERROR(std::visit(
2737
29
                    [&](auto url_const, auto part_const, auto key_const) {
2738
29
                        return vector_parse_key<url_const, part_const, key_const>(
2739
29
                                url_col, url_parts, key_col, input_rows_count, null_map_data,
2740
29
                                res_chars, res_offsets);
2741
29
                    },
2742
29
                    make_bool_variant(url_const), make_bool_variant(part_const),
2743
29
                    make_bool_variant(key_const)));
2744
64
        } else {
2745
64
            const bool url_const = col_const[0];
2746
64
            RETURN_IF_ERROR(std::visit(
2747
64
                    [&](auto url_const, auto part_const) {
2748
64
                        return vector_parse<url_const, part_const>(url_col, url_parts,
2749
64
                                                                   input_rows_count, null_map_data,
2750
64
                                                                   res_chars, res_offsets);
2751
64
                    },
2752
64
                    make_bool_variant(url_const), make_bool_variant(part_const)));
2753
64
        }
2754
93
        block.get_by_position(result).column =
2755
93
                ColumnNullable::create(std::move(res), std::move(null_map));
2756
93
        return Status::OK();
2757
93
    }
2758
    template <bool url_const, bool part_const>
2759
    static Status vector_parse(const ColumnString* url_col,
2760
                               std::vector<UrlParser::UrlPart>& url_parts, const int size,
2761
                               ColumnUInt8::Container& null_map_data,
2762
64
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2763
148
        for (size_t i = 0; i < size; ++i) {
2764
84
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2765
84
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2766
84
            StringRef parse_res;
2767
84
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2768
64
                if (parse_res.empty()) [[unlikely]] {
2769
4
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2770
4
                    continue;
2771
4
                }
2772
60
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2773
60
                                            res_chars, res_offsets);
2774
60
            } else {
2775
20
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2776
20
            }
2777
84
        }
2778
64
        return Status::OK();
2779
64
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2762
22
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2763
64
        for (size_t i = 0; i < size; ++i) {
2764
42
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2765
42
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2766
42
            StringRef parse_res;
2767
42
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2768
32
                if (parse_res.empty()) [[unlikely]] {
2769
2
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2770
2
                    continue;
2771
2
                }
2772
30
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2773
30
                                            res_chars, res_offsets);
2774
30
            } else {
2775
10
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2776
10
            }
2777
42
        }
2778
22
        return Status::OK();
2779
22
    }
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
_ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2762
21
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2763
42
        for (size_t i = 0; i < size; ++i) {
2764
21
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2765
21
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2766
21
            StringRef parse_res;
2767
21
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2768
16
                if (parse_res.empty()) [[unlikely]] {
2769
1
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2770
1
                    continue;
2771
1
                }
2772
15
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2773
15
                                            res_chars, res_offsets);
2774
15
            } else {
2775
5
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2776
5
            }
2777
21
        }
2778
21
        return Status::OK();
2779
21
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2762
21
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2763
42
        for (size_t i = 0; i < size; ++i) {
2764
21
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2765
21
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2766
21
            StringRef parse_res;
2767
21
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2768
16
                if (parse_res.empty()) [[unlikely]] {
2769
1
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2770
1
                    continue;
2771
1
                }
2772
15
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2773
15
                                            res_chars, res_offsets);
2774
15
            } else {
2775
5
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2776
5
            }
2777
21
        }
2778
21
        return Status::OK();
2779
21
    }
2780
    template <bool url_const, bool part_const, bool key_const>
2781
    static Status vector_parse_key(const ColumnString* url_col,
2782
                                   std::vector<UrlParser::UrlPart>& url_parts,
2783
                                   const ColumnString* key_col, const int size,
2784
                                   ColumnUInt8::Container& null_map_data,
2785
                                   ColumnString::Chars& res_chars,
2786
29
                                   ColumnString::Offsets& res_offsets) {
2787
61
        for (size_t i = 0; i < size; ++i) {
2788
32
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
32
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
32
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
32
            StringRef parse_res;
2792
32
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
16
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
16
                                            res_chars, res_offsets);
2795
16
            } else {
2796
16
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
16
                continue;
2798
16
            }
2799
32
        }
2800
29
        return Status::OK();
2801
29
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
5
                                   ColumnString::Offsets& res_offsets) {
2787
13
        for (size_t i = 0; i < size; ++i) {
2788
8
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
8
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
8
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
8
            StringRef parse_res;
2792
8
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
4
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
4
                                            res_chars, res_offsets);
2795
4
            } else {
2796
4
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
4
                continue;
2798
4
            }
2799
8
        }
2800
5
        return Status::OK();
2801
5
    }
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
4
                                   ColumnString::Offsets& res_offsets) {
2787
8
        for (size_t i = 0; i < size; ++i) {
2788
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
4
            StringRef parse_res;
2792
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
2
                                            res_chars, res_offsets);
2795
2
            } else {
2796
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
2
                continue;
2798
2
            }
2799
4
        }
2800
4
        return Status::OK();
2801
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
4
                                   ColumnString::Offsets& res_offsets) {
2787
8
        for (size_t i = 0; i < size; ++i) {
2788
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
4
            StringRef parse_res;
2792
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
2
                                            res_chars, res_offsets);
2795
2
            } else {
2796
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
2
                continue;
2798
2
            }
2799
4
        }
2800
4
        return Status::OK();
2801
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
4
                                   ColumnString::Offsets& res_offsets) {
2787
8
        for (size_t i = 0; i < size; ++i) {
2788
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
4
            StringRef parse_res;
2792
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
2
                                            res_chars, res_offsets);
2795
2
            } else {
2796
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
2
                continue;
2798
2
            }
2799
4
        }
2800
4
        return Status::OK();
2801
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
4
                                   ColumnString::Offsets& res_offsets) {
2787
8
        for (size_t i = 0; i < size; ++i) {
2788
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
4
            StringRef parse_res;
2792
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
2
                                            res_chars, res_offsets);
2795
2
            } else {
2796
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
2
                continue;
2798
2
            }
2799
4
        }
2800
4
        return Status::OK();
2801
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
4
                                   ColumnString::Offsets& res_offsets) {
2787
8
        for (size_t i = 0; i < size; ++i) {
2788
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
4
            StringRef parse_res;
2792
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
2
                                            res_chars, res_offsets);
2795
2
            } else {
2796
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
2
                continue;
2798
2
            }
2799
4
        }
2800
4
        return Status::OK();
2801
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2786
4
                                   ColumnString::Offsets& res_offsets) {
2787
8
        for (size_t i = 0; i < size; ++i) {
2788
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2789
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2790
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2791
4
            StringRef parse_res;
2792
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2793
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2794
2
                                            res_chars, res_offsets);
2795
2
            } else {
2796
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2797
2
                continue;
2798
2
            }
2799
4
        }
2800
4
        return Status::OK();
2801
4
    }
2802
};
2803
2804
class FunctionUrlDecode : public IFunction {
2805
public:
2806
    static constexpr auto name = "url_decode";
2807
2
    static FunctionPtr create() { return std::make_shared<FunctionUrlDecode>(); }
2808
1
    String get_name() const override { return name; }
2809
0
    size_t get_number_of_arguments() const override { return 1; }
2810
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2811
0
        return std::make_shared<DataTypeString>();
2812
0
    }
2813
2814
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2815
0
                        uint32_t result, size_t input_rows_count) const override {
2816
0
        auto res = ColumnString::create();
2817
0
        res->get_offsets().reserve(input_rows_count);
2818
2819
0
        const auto* url_col =
2820
0
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
2821
2822
0
        std::string decoded_url;
2823
0
        for (size_t i = 0; i < input_rows_count; ++i) {
2824
0
            auto url = url_col->get_data_at(i);
2825
0
            if (!url_decode(url.to_string(), &decoded_url)) {
2826
0
                return Status::InternalError("Decode url failed");
2827
0
            }
2828
0
            res->insert_data(decoded_url.data(), decoded_url.size());
2829
0
            decoded_url.clear();
2830
0
        }
2831
2832
0
        block.get_by_position(result).column = std::move(res);
2833
0
        return Status::OK();
2834
0
    }
2835
};
2836
2837
class FunctionUrlEncode : public IFunction {
2838
public:
2839
    static constexpr auto name = "url_encode";
2840
6
    static FunctionPtr create() { return std::make_shared<FunctionUrlEncode>(); }
2841
1
    String get_name() const override { return name; }
2842
4
    size_t get_number_of_arguments() const override { return 1; }
2843
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2844
4
        return std::make_shared<DataTypeString>();
2845
4
    }
2846
2847
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2848
4
                        uint32_t result, size_t input_rows_count) const override {
2849
4
        auto res = ColumnString::create();
2850
4
        res->get_offsets().reserve(input_rows_count);
2851
2852
4
        const auto* url_col =
2853
4
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
2854
2855
4
        std::string encoded_url;
2856
10
        for (size_t i = 0; i < input_rows_count; ++i) {
2857
6
            auto url = url_col->get_data_at(i);
2858
6
            url_encode(url.to_string_view(), &encoded_url);
2859
6
            res->insert_data(encoded_url.data(), encoded_url.size());
2860
6
            encoded_url.clear();
2861
6
        }
2862
2863
4
        block.get_by_position(result).column = std::move(res);
2864
4
        return Status::OK();
2865
4
    }
2866
};
2867
2868
class FunctionRandomBytes : public IFunction {
2869
public:
2870
    static constexpr auto name = "random_bytes";
2871
2
    static FunctionPtr create() { return std::make_shared<FunctionRandomBytes>(); }
2872
1
    String get_name() const override { return name; }
2873
0
    size_t get_number_of_arguments() const override { return 1; }
2874
1
    bool is_variadic() const override { return false; }
2875
2876
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2877
0
        return std::make_shared<DataTypeString>();
2878
0
    }
2879
2880
0
    bool use_default_implementation_for_constants() const final { return false; }
2881
2882
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2883
0
                        uint32_t result, size_t input_rows_count) const override {
2884
0
        auto res = ColumnString::create();
2885
0
        auto& res_offsets = res->get_offsets();
2886
0
        auto& res_chars = res->get_chars();
2887
0
        res_offsets.resize(input_rows_count);
2888
2889
0
        auto [arg_col, arg_const] = unpack_if_const(block.get_by_position(arguments[0]).column);
2890
0
        const auto* length_col = assert_cast<const ColumnInt32*>(arg_col.get());
2891
2892
0
        if (arg_const) {
2893
0
            res_chars.reserve(input_rows_count * (length_col->get_element(0) + 2));
2894
0
        }
2895
2896
0
        std::vector<uint8_t, Allocator_<uint8_t>> random_bytes;
2897
0
        std::random_device rd;
2898
0
        std::mt19937 gen(rd());
2899
2900
0
        std::uniform_int_distribution<unsigned short> distribution(0, 255);
2901
0
        for (size_t i = 0; i < input_rows_count; ++i) {
2902
0
            size_t index = index_check_const(i, arg_const);
2903
0
            if (length_col->get_element(index) < 0) [[unlikely]] {
2904
0
                return Status::InvalidArgument("argument {} of function {} at row {} was invalid.",
2905
0
                                               length_col->get_element(index), name, index);
2906
0
            }
2907
0
            random_bytes.resize(length_col->get_element(index));
2908
2909
0
            for (auto& byte : random_bytes) {
2910
0
                byte = distribution(gen) & 0xFF;
2911
0
            }
2912
2913
0
            std::basic_ostringstream<char, std::char_traits<char>, Allocator_<char>> oss;
2914
0
            for (const auto& byte : random_bytes) {
2915
0
                oss << std::setw(2) << std::setfill('0') << std::hex << static_cast<int>(byte);
2916
0
            }
2917
2918
0
            StringOP::push_value_string("0x" + oss.str(), i, res_chars, res_offsets);
2919
0
            random_bytes.clear();
2920
0
        }
2921
2922
0
        block.get_by_position(result).column = std::move(res);
2923
2924
0
        return Status::OK();
2925
0
    }
2926
};
2927
2928
template <typename Impl>
2929
class FunctionMoneyFormat : public IFunction {
2930
public:
2931
    static constexpr auto name = "money_format";
2932
25
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv
Line
Count
Source
2932
4
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE6createEv
Line
Count
Source
2932
3
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE6createEv
Line
Count
Source
2932
3
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE6createEv
Line
Count
Source
2932
3
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv
Line
Count
Source
2932
2
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv
Line
Count
Source
2932
6
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv
Line
Count
Source
2932
2
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv
Line
Count
Source
2932
2
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
2933
8
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev
Line
Count
Source
2933
1
    String get_name() const override { return name; }
2934
2935
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2936
8
        if (arguments.size() != 1) {
2937
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2938
0
                                   "Function {} requires exactly 1 argument", name);
2939
0
        }
2940
2941
8
        return std::make_shared<DataTypeString>();
2942
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2935
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2936
1
        if (arguments.size() != 1) {
2937
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2938
0
                                   "Function {} requires exactly 1 argument", name);
2939
0
        }
2940
2941
1
        return std::make_shared<DataTypeString>();
2942
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2935
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2936
1
        if (arguments.size() != 1) {
2937
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2938
0
                                   "Function {} requires exactly 1 argument", name);
2939
0
        }
2940
2941
1
        return std::make_shared<DataTypeString>();
2942
1
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2935
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2936
1
        if (arguments.size() != 1) {
2937
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2938
0
                                   "Function {} requires exactly 1 argument", name);
2939
0
        }
2940
2941
1
        return std::make_shared<DataTypeString>();
2942
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2935
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2936
1
        if (arguments.size() != 1) {
2937
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2938
0
                                   "Function {} requires exactly 1 argument", name);
2939
0
        }
2940
2941
1
        return std::make_shared<DataTypeString>();
2942
1
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2935
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2936
4
        if (arguments.size() != 1) {
2937
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2938
0
                                   "Function {} requires exactly 1 argument", name);
2939
0
        }
2940
2941
4
        return std::make_shared<DataTypeString>();
2942
4
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
2943
8
    DataTypes get_variadic_argument_types_impl() const override {
2944
8
        return Impl::get_variadic_argument_types();
2945
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2943
1
    DataTypes get_variadic_argument_types_impl() const override {
2944
1
        return Impl::get_variadic_argument_types();
2945
1
    }
2946
8
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv
Line
Count
Source
2946
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE23get_number_of_argumentsEv
Line
Count
Source
2946
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE23get_number_of_argumentsEv
Line
Count
Source
2946
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE23get_number_of_argumentsEv
Line
Count
Source
2946
1
    size_t get_number_of_arguments() const override { return 1; }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv
Line
Count
Source
2946
4
    size_t get_number_of_arguments() const override { return 1; }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv
2947
2948
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2949
8
                        uint32_t result, size_t input_rows_count) const override {
2950
8
        auto res_column = ColumnString::create();
2951
8
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2952
2953
8
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2954
2955
8
        Impl::execute(context, result_column, argument_column, input_rows_count);
2956
2957
8
        block.replace_by_position(result, std::move(res_column));
2958
8
        return Status::OK();
2959
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2949
2
                        uint32_t result, size_t input_rows_count) const override {
2950
2
        auto res_column = ColumnString::create();
2951
2
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2952
2953
2
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2954
2955
2
        Impl::execute(context, result_column, argument_column, input_rows_count);
2956
2957
2
        block.replace_by_position(result, std::move(res_column));
2958
2
        return Status::OK();
2959
2
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2949
1
                        uint32_t result, size_t input_rows_count) const override {
2950
1
        auto res_column = ColumnString::create();
2951
1
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2952
2953
1
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2954
2955
1
        Impl::execute(context, result_column, argument_column, input_rows_count);
2956
2957
1
        block.replace_by_position(result, std::move(res_column));
2958
1
        return Status::OK();
2959
1
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2949
1
                        uint32_t result, size_t input_rows_count) const override {
2950
1
        auto res_column = ColumnString::create();
2951
1
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2952
2953
1
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2954
2955
1
        Impl::execute(context, result_column, argument_column, input_rows_count);
2956
2957
1
        block.replace_by_position(result, std::move(res_column));
2958
1
        return Status::OK();
2959
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2949
1
                        uint32_t result, size_t input_rows_count) const override {
2950
1
        auto res_column = ColumnString::create();
2951
1
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2952
2953
1
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2954
2955
1
        Impl::execute(context, result_column, argument_column, input_rows_count);
2956
2957
1
        block.replace_by_position(result, std::move(res_column));
2958
1
        return Status::OK();
2959
1
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2949
3
                        uint32_t result, size_t input_rows_count) const override {
2950
3
        auto res_column = ColumnString::create();
2951
3
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2952
2953
3
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2954
2955
3
        Impl::execute(context, result_column, argument_column, input_rows_count);
2956
2957
3
        block.replace_by_position(result, std::move(res_column));
2958
3
        return Status::OK();
2959
3
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
2960
};
2961
2962
// ----------------------------------------------------------------------
2963
// SimpleItoaWithCommas()
2964
//    Description: converts an integer to a string.
2965
//    Puts commas every 3 spaces.
2966
//    Faster than printf("%d")?
2967
//
2968
//    Return value: string
2969
// ----------------------------------------------------------------------
2970
template <typename T>
2971
56
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2972
56
    char* p = buffer + buffer_size;
2973
    // Need to use unsigned T instead of T to correctly handle
2974
56
    std::make_unsigned_t<T> n = i;
2975
56
    if (i < 0) {
2976
20
        n = 0 - n;
2977
20
    }
2978
56
    *--p = '0' + n % 10; // this case deals with the number "0"
2979
56
    n /= 10;
2980
142
    while (n) {
2981
127
        *--p = '0' + n % 10;
2982
127
        n /= 10;
2983
127
        if (n == 0) {
2984
25
            break;
2985
25
        }
2986
2987
102
        *--p = '0' + n % 10;
2988
102
        n /= 10;
2989
102
        if (n == 0) {
2990
16
            break;
2991
16
        }
2992
2993
86
        *--p = ',';
2994
86
        *--p = '0' + n % 10;
2995
86
        n /= 10;
2996
        // For this unrolling, we check if n == 0 in the main while loop
2997
86
    }
2998
56
    if (i < 0) {
2999
20
        *--p = '-';
3000
20
    }
3001
56
    return p;
3002
56
}
_ZN5doris20SimpleItoaWithCommasIlEEPcT_S1_i
Line
Count
Source
2971
27
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2972
27
    char* p = buffer + buffer_size;
2973
    // Need to use unsigned T instead of T to correctly handle
2974
27
    std::make_unsigned_t<T> n = i;
2975
27
    if (i < 0) {
2976
10
        n = 0 - n;
2977
10
    }
2978
27
    *--p = '0' + n % 10; // this case deals with the number "0"
2979
27
    n /= 10;
2980
68
    while (n) {
2981
63
        *--p = '0' + n % 10;
2982
63
        n /= 10;
2983
63
        if (n == 0) {
2984
17
            break;
2985
17
        }
2986
2987
46
        *--p = '0' + n % 10;
2988
46
        n /= 10;
2989
46
        if (n == 0) {
2990
5
            break;
2991
5
        }
2992
2993
41
        *--p = ',';
2994
41
        *--p = '0' + n % 10;
2995
41
        n /= 10;
2996
        // For this unrolling, we check if n == 0 in the main while loop
2997
41
    }
2998
27
    if (i < 0) {
2999
10
        *--p = '-';
3000
10
    }
3001
27
    return p;
3002
27
}
_ZN5doris20SimpleItoaWithCommasInEEPcT_S1_i
Line
Count
Source
2971
29
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2972
29
    char* p = buffer + buffer_size;
2973
    // Need to use unsigned T instead of T to correctly handle
2974
29
    std::make_unsigned_t<T> n = i;
2975
29
    if (i < 0) {
2976
10
        n = 0 - n;
2977
10
    }
2978
29
    *--p = '0' + n % 10; // this case deals with the number "0"
2979
29
    n /= 10;
2980
74
    while (n) {
2981
64
        *--p = '0' + n % 10;
2982
64
        n /= 10;
2983
64
        if (n == 0) {
2984
8
            break;
2985
8
        }
2986
2987
56
        *--p = '0' + n % 10;
2988
56
        n /= 10;
2989
56
        if (n == 0) {
2990
11
            break;
2991
11
        }
2992
2993
45
        *--p = ',';
2994
45
        *--p = '0' + n % 10;
2995
45
        n /= 10;
2996
        // For this unrolling, we check if n == 0 in the main while loop
2997
45
    }
2998
29
    if (i < 0) {
2999
10
        *--p = '-';
3000
10
    }
3001
29
    return p;
3002
29
}
3003
3004
namespace MoneyFormat {
3005
3006
0
constexpr size_t MAX_FORMAT_LEN_DEC32() {
3007
0
    // Decimal(9, 0)
3008
0
    // Double the size to avoid some unexpected bug.
3009
0
    return 2 * (1 + 9 + (9 / 3) + 3);
3010
0
}
3011
3012
0
constexpr size_t MAX_FORMAT_LEN_DEC64() {
3013
0
    // Decimal(18, 0)
3014
0
    // Double the size to avoid some unexpected bug.
3015
0
    return 2 * (1 + 18 + (18 / 3) + 3);
3016
0
}
3017
3018
0
constexpr size_t MAX_FORMAT_LEN_DEC128V2() {
3019
0
    // DecimalV2 has at most 27 digits
3020
0
    // Double the size to avoid some unexpected bug.
3021
0
    return 2 * (1 + 27 + (27 / 3) + 3);
3022
0
}
3023
3024
0
constexpr size_t MAX_FORMAT_LEN_DEC128V3() {
3025
0
    // Decimal(38, 0)
3026
0
    // Double the size to avoid some unexpected bug.
3027
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3028
0
}
3029
3030
0
constexpr size_t MAX_FORMAT_LEN_INT64() {
3031
0
    // INT_MIN = -9223372036854775807
3032
0
    // Double the size to avoid some unexpected bug.
3033
0
    return 2 * (1 + 20 + (20 / 3) + 3);
3034
0
}
3035
3036
0
constexpr size_t MAX_FORMAT_LEN_INT128() {
3037
0
    // INT128_MIN = -170141183460469231731687303715884105728
3038
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3039
0
}
3040
3041
template <typename T, size_t N>
3042
25
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
3043
25
    static_assert(std::is_integral<T>::value);
3044
25
    const bool is_negative = int_value < 0 || frac_value < 0;
3045
3046
    // do round to frac_part
3047
    // magic number 2: since we need to round frac_part to 2 digits
3048
25
    if (scale > 2) {
3049
19
        DCHECK(scale <= 38);
3050
        // do rounding, so we need to reserve 3 digits.
3051
19
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
3052
        // do devide first to avoid overflow
3053
        // after round frac_value will be positive by design.
3054
19
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3055
19
        frac_value /= 10;
3056
19
    } else if (scale < 2) {
3057
6
        DCHECK(frac_value < 100);
3058
        // since scale <= 2, overflow is impossiable
3059
6
        frac_value = frac_value * common::exp10_i32(2 - scale);
3060
6
    }
3061
3062
25
    if (frac_value == 100) {
3063
3
        if (is_negative) {
3064
2
            int_value -= 1;
3065
2
        } else {
3066
1
            int_value += 1;
3067
1
        }
3068
3
        frac_value = 0;
3069
3
    }
3070
3071
25
    bool append_sign_manually = false;
3072
25
    if (is_negative && int_value == 0) {
3073
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
3074
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
3075
        // this is why we introduce argument append_sing_manually.
3076
2
        append_sign_manually = true;
3077
2
    }
3078
3079
25
    char local[N];
3080
25
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3081
25
    const Int32 integer_str_len = N - (p - local);
3082
25
    const Int32 frac_str_len = 2;
3083
25
    const Int32 whole_decimal_str_len =
3084
25
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
3085
3086
25
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3087
    // Modify a string passed via stringref
3088
25
    char* result_data = const_cast<char*>(result.data);
3089
3090
25
    if (append_sign_manually) {
3091
2
        memset(result_data, '-', 1);
3092
2
    }
3093
3094
25
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3095
25
    *(result_data + whole_decimal_str_len - 3) = '.';
3096
25
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3097
25
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3098
25
    return result;
3099
25
};
_ZN5doris11MoneyFormat15do_money_formatIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
3042
3
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
3043
3
    static_assert(std::is_integral<T>::value);
3044
3
    const bool is_negative = int_value < 0 || frac_value < 0;
3045
3046
    // do round to frac_part
3047
    // magic number 2: since we need to round frac_part to 2 digits
3048
3
    if (scale > 2) {
3049
0
        DCHECK(scale <= 38);
3050
        // do rounding, so we need to reserve 3 digits.
3051
0
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
3052
        // do devide first to avoid overflow
3053
        // after round frac_value will be positive by design.
3054
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3055
0
        frac_value /= 10;
3056
3
    } else if (scale < 2) {
3057
3
        DCHECK(frac_value < 100);
3058
        // since scale <= 2, overflow is impossiable
3059
3
        frac_value = frac_value * common::exp10_i32(2 - scale);
3060
3
    }
3061
3062
3
    if (frac_value == 100) {
3063
0
        if (is_negative) {
3064
0
            int_value -= 1;
3065
0
        } else {
3066
0
            int_value += 1;
3067
0
        }
3068
0
        frac_value = 0;
3069
0
    }
3070
3071
3
    bool append_sign_manually = false;
3072
3
    if (is_negative && int_value == 0) {
3073
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
3074
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
3075
        // this is why we introduce argument append_sing_manually.
3076
0
        append_sign_manually = true;
3077
0
    }
3078
3079
3
    char local[N];
3080
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3081
3
    const Int32 integer_str_len = N - (p - local);
3082
3
    const Int32 frac_str_len = 2;
3083
3
    const Int32 whole_decimal_str_len =
3084
3
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
3085
3086
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3087
    // Modify a string passed via stringref
3088
3
    char* result_data = const_cast<char*>(result.data);
3089
3090
3
    if (append_sign_manually) {
3091
0
        memset(result_data, '-', 1);
3092
0
    }
3093
3094
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3095
3
    *(result_data + whole_decimal_str_len - 3) = '.';
3096
3
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3097
3
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3098
3
    return result;
3099
3
};
_ZN5doris11MoneyFormat15do_money_formatInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
3042
3
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
3043
3
    static_assert(std::is_integral<T>::value);
3044
3
    const bool is_negative = int_value < 0 || frac_value < 0;
3045
3046
    // do round to frac_part
3047
    // magic number 2: since we need to round frac_part to 2 digits
3048
3
    if (scale > 2) {
3049
0
        DCHECK(scale <= 38);
3050
        // do rounding, so we need to reserve 3 digits.
3051
0
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
3052
        // do devide first to avoid overflow
3053
        // after round frac_value will be positive by design.
3054
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3055
0
        frac_value /= 10;
3056
3
    } else if (scale < 2) {
3057
3
        DCHECK(frac_value < 100);
3058
        // since scale <= 2, overflow is impossiable
3059
3
        frac_value = frac_value * common::exp10_i32(2 - scale);
3060
3
    }
3061
3062
3
    if (frac_value == 100) {
3063
0
        if (is_negative) {
3064
0
            int_value -= 1;
3065
0
        } else {
3066
0
            int_value += 1;
3067
0
        }
3068
0
        frac_value = 0;
3069
0
    }
3070
3071
3
    bool append_sign_manually = false;
3072
3
    if (is_negative && int_value == 0) {
3073
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
3074
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
3075
        // this is why we introduce argument append_sing_manually.
3076
0
        append_sign_manually = true;
3077
0
    }
3078
3079
3
    char local[N];
3080
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3081
3
    const Int32 integer_str_len = N - (p - local);
3082
3
    const Int32 frac_str_len = 2;
3083
3
    const Int32 whole_decimal_str_len =
3084
3
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
3085
3086
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3087
    // Modify a string passed via stringref
3088
3
    char* result_data = const_cast<char*>(result.data);
3089
3090
3
    if (append_sign_manually) {
3091
0
        memset(result_data, '-', 1);
3092
0
    }
3093
3094
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3095
3
    *(result_data + whole_decimal_str_len - 3) = '.';
3096
3
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3097
3
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3098
3
    return result;
3099
3
};
_ZN5doris11MoneyFormat15do_money_formatInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
3042
14
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
3043
14
    static_assert(std::is_integral<T>::value);
3044
14
    const bool is_negative = int_value < 0 || frac_value < 0;
3045
3046
    // do round to frac_part
3047
    // magic number 2: since we need to round frac_part to 2 digits
3048
14
    if (scale > 2) {
3049
14
        DCHECK(scale <= 38);
3050
        // do rounding, so we need to reserve 3 digits.
3051
14
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
3052
        // do devide first to avoid overflow
3053
        // after round frac_value will be positive by design.
3054
14
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3055
14
        frac_value /= 10;
3056
14
    } else if (scale < 2) {
3057
0
        DCHECK(frac_value < 100);
3058
        // since scale <= 2, overflow is impossiable
3059
0
        frac_value = frac_value * common::exp10_i32(2 - scale);
3060
0
    }
3061
3062
14
    if (frac_value == 100) {
3063
3
        if (is_negative) {
3064
2
            int_value -= 1;
3065
2
        } else {
3066
1
            int_value += 1;
3067
1
        }
3068
3
        frac_value = 0;
3069
3
    }
3070
3071
14
    bool append_sign_manually = false;
3072
14
    if (is_negative && int_value == 0) {
3073
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
3074
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
3075
        // this is why we introduce argument append_sing_manually.
3076
2
        append_sign_manually = true;
3077
2
    }
3078
3079
14
    char local[N];
3080
14
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3081
14
    const Int32 integer_str_len = N - (p - local);
3082
14
    const Int32 frac_str_len = 2;
3083
14
    const Int32 whole_decimal_str_len =
3084
14
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
3085
3086
14
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3087
    // Modify a string passed via stringref
3088
14
    char* result_data = const_cast<char*>(result.data);
3089
3090
14
    if (append_sign_manually) {
3091
2
        memset(result_data, '-', 1);
3092
2
    }
3093
3094
14
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3095
14
    *(result_data + whole_decimal_str_len - 3) = '.';
3096
14
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3097
14
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3098
14
    return result;
3099
14
};
Unexecuted instantiation: _ZN5doris11MoneyFormat15do_money_formatIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
_ZN5doris11MoneyFormat15do_money_formatIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
3042
5
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
3043
5
    static_assert(std::is_integral<T>::value);
3044
5
    const bool is_negative = int_value < 0 || frac_value < 0;
3045
3046
    // do round to frac_part
3047
    // magic number 2: since we need to round frac_part to 2 digits
3048
5
    if (scale > 2) {
3049
5
        DCHECK(scale <= 38);
3050
        // do rounding, so we need to reserve 3 digits.
3051
5
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
3052
        // do devide first to avoid overflow
3053
        // after round frac_value will be positive by design.
3054
5
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3055
5
        frac_value /= 10;
3056
5
    } else if (scale < 2) {
3057
0
        DCHECK(frac_value < 100);
3058
        // since scale <= 2, overflow is impossiable
3059
0
        frac_value = frac_value * common::exp10_i32(2 - scale);
3060
0
    }
3061
3062
5
    if (frac_value == 100) {
3063
0
        if (is_negative) {
3064
0
            int_value -= 1;
3065
0
        } else {
3066
0
            int_value += 1;
3067
0
        }
3068
0
        frac_value = 0;
3069
0
    }
3070
3071
5
    bool append_sign_manually = false;
3072
5
    if (is_negative && int_value == 0) {
3073
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
3074
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
3075
        // this is why we introduce argument append_sing_manually.
3076
0
        append_sign_manually = true;
3077
0
    }
3078
3079
5
    char local[N];
3080
5
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3081
5
    const Int32 integer_str_len = N - (p - local);
3082
5
    const Int32 frac_str_len = 2;
3083
5
    const Int32 whole_decimal_str_len =
3084
5
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
3085
3086
5
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3087
    // Modify a string passed via stringref
3088
5
    char* result_data = const_cast<char*>(result.data);
3089
3090
5
    if (append_sign_manually) {
3091
0
        memset(result_data, '-', 1);
3092
0
    }
3093
3094
5
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3095
5
    *(result_data + whole_decimal_str_len - 3) = '.';
3096
5
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3097
5
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3098
5
    return result;
3099
5
};
3100
3101
// Note string value must be valid decimal string which contains two digits after the decimal point
3102
4
static StringRef do_money_format(FunctionContext* context, const std::string& value) {
3103
4
    bool is_positive = (value[0] != '-');
3104
4
    int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3;
3105
4
    StringRef result = context->create_temp_string_val(result_len);
3106
    // Modify a string passed via stringref
3107
4
    char* result_data = const_cast<char*>(result.data);
3108
4
    if (!is_positive) {
3109
2
        *result_data = '-';
3110
2
    }
3111
10
    for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) {
3112
9
        *(result_data + j) = *(value.data() + i);
3113
9
        if (i - 1 < 0) {
3114
2
            break;
3115
2
        }
3116
7
        *(result_data + j - 1) = *(value.data() + i - 1);
3117
7
        if (i - 2 < 0) {
3118
1
            break;
3119
1
        }
3120
6
        *(result_data + j - 2) = *(value.data() + i - 2);
3121
6
        if (j - 3 > 1 || (j - 3 == 1 && is_positive)) {
3122
4
            *(result_data + j - 3) = ',';
3123
4
            j -= 4;
3124
4
        } else {
3125
2
            j -= 3;
3126
2
        }
3127
6
    }
3128
4
    memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3);
3129
4
    return result;
3130
4
};
Unexecuted instantiation: column_string_test.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: partition_transformers_test.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: function_money_format_test.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: function_sub_replace_test.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: spill_iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: pipeline_fragment_context.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: partition_transformers.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: viceberg_table_writer.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: function_split_by_regexp.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
function_string.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3102
4
static StringRef do_money_format(FunctionContext* context, const std::string& value) {
3103
4
    bool is_positive = (value[0] != '-');
3104
4
    int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3;
3105
4
    StringRef result = context->create_temp_string_val(result_len);
3106
    // Modify a string passed via stringref
3107
4
    char* result_data = const_cast<char*>(result.data);
3108
4
    if (!is_positive) {
3109
2
        *result_data = '-';
3110
2
    }
3111
10
    for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) {
3112
9
        *(result_data + j) = *(value.data() + i);
3113
9
        if (i - 1 < 0) {
3114
2
            break;
3115
2
        }
3116
7
        *(result_data + j - 1) = *(value.data() + i - 1);
3117
7
        if (i - 2 < 0) {
3118
1
            break;
3119
1
        }
3120
6
        *(result_data + j - 2) = *(value.data() + i - 2);
3121
6
        if (j - 3 > 1 || (j - 3 == 1 && is_positive)) {
3122
4
            *(result_data + j - 3) = ',';
3123
4
            j -= 4;
3124
4
        } else {
3125
2
            j -= 3;
3126
2
        }
3127
6
    }
3128
4
    memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3);
3129
4
    return result;
3130
4
};
3131
3132
} // namespace MoneyFormat
3133
3134
namespace FormatRound {
3135
3136
0
constexpr size_t MAX_FORMAT_LEN_DEC32() {
3137
0
    // Decimal(9, 0)
3138
0
    // Double the size to avoid some unexpected bug.
3139
0
    return 2 * (1 + 9 + (9 / 3) + 3);
3140
0
}
3141
3142
0
constexpr size_t MAX_FORMAT_LEN_DEC64() {
3143
0
    // Decimal(18, 0)
3144
0
    // Double the size to avoid some unexpected bug.
3145
0
    return 2 * (1 + 18 + (18 / 3) + 3);
3146
0
}
3147
3148
0
constexpr size_t MAX_FORMAT_LEN_DEC128V2() {
3149
0
    // DecimalV2 has at most 27 digits
3150
0
    // Double the size to avoid some unexpected bug.
3151
0
    return 2 * (1 + 27 + (27 / 3) + 3);
3152
0
}
3153
3154
0
constexpr size_t MAX_FORMAT_LEN_DEC128V3() {
3155
0
    // Decimal(38, 0)
3156
0
    // Double the size to avoid some unexpected bug.
3157
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3158
0
}
3159
3160
0
constexpr size_t MAX_FORMAT_LEN_INT64() {
3161
0
    // INT_MIN = -9223372036854775807
3162
0
    // Double the size to avoid some unexpected bug.
3163
0
    return 2 * (1 + 20 + (20 / 3) + 3);
3164
0
}
3165
3166
0
constexpr size_t MAX_FORMAT_LEN_INT128() {
3167
0
    // INT128_MIN = -170141183460469231731687303715884105728
3168
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3169
0
}
3170
3171
template <typename T, size_t N>
3172
StringRef do_format_round(FunctionContext* context, UInt32 scale, T int_value, T frac_value,
3173
31
                          Int32 decimal_places) {
3174
31
    static_assert(std::is_integral<T>::value);
3175
31
    const bool is_negative = int_value < 0 || frac_value < 0;
3176
3177
    // do round to frac_part based on decimal_places
3178
31
    if (scale > decimal_places && decimal_places > 0) {
3179
14
        DCHECK(scale <= 38);
3180
        // do rounding, so we need to reserve decimal_places + 1 digits
3181
14
        auto multiplier =
3182
14
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3183
        // do divide first to avoid overflow
3184
        // after round frac_value will be positive by design
3185
14
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3186
14
        frac_value /= 10;
3187
17
    } else if (scale < decimal_places && decimal_places > 0) {
3188
        // since scale <= decimal_places, overflow is impossible
3189
13
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3190
13
    }
3191
3192
    // Calculate power of 10 for decimal_places
3193
31
    T decimal_power = common::exp10_i32(decimal_places);
3194
31
    if (frac_value == decimal_power) {
3195
0
        if (is_negative) {
3196
0
            int_value -= 1;
3197
0
        } else {
3198
0
            int_value += 1;
3199
0
        }
3200
0
        frac_value = 0;
3201
0
    }
3202
3203
31
    bool append_sign_manually = false;
3204
31
    if (is_negative && int_value == 0) {
3205
0
        append_sign_manually = true;
3206
0
    }
3207
3208
31
    char local[N];
3209
31
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3210
31
    const Int32 integer_str_len = N - (p - local);
3211
31
    const Int32 frac_str_len = decimal_places;
3212
31
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3213
31
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3214
3215
31
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3216
    // Modify a string passed via stringref
3217
31
    char* result_data = const_cast<char*>(result.data);
3218
3219
31
    if (append_sign_manually) {
3220
0
        memset(result_data, '-', 1);
3221
0
    }
3222
3223
31
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3224
31
    if (decimal_places > 0) {
3225
27
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3226
27
    }
3227
3228
    // Convert fractional part to string with proper padding
3229
31
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3230
139
    for (int i = 0; i <= decimal_places - 1; ++i) {
3231
108
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3232
108
        remaining_frac /= 10;
3233
108
    }
3234
31
    return result;
3235
31
}
_ZN5doris11FormatRound15do_format_roundIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3173
8
                          Int32 decimal_places) {
3174
8
    static_assert(std::is_integral<T>::value);
3175
8
    const bool is_negative = int_value < 0 || frac_value < 0;
3176
3177
    // do round to frac_part based on decimal_places
3178
8
    if (scale > decimal_places && decimal_places > 0) {
3179
0
        DCHECK(scale <= 38);
3180
        // do rounding, so we need to reserve decimal_places + 1 digits
3181
0
        auto multiplier =
3182
0
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3183
        // do divide first to avoid overflow
3184
        // after round frac_value will be positive by design
3185
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3186
0
        frac_value /= 10;
3187
8
    } else if (scale < decimal_places && decimal_places > 0) {
3188
        // since scale <= decimal_places, overflow is impossible
3189
6
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3190
6
    }
3191
3192
    // Calculate power of 10 for decimal_places
3193
8
    T decimal_power = common::exp10_i32(decimal_places);
3194
8
    if (frac_value == decimal_power) {
3195
0
        if (is_negative) {
3196
0
            int_value -= 1;
3197
0
        } else {
3198
0
            int_value += 1;
3199
0
        }
3200
0
        frac_value = 0;
3201
0
    }
3202
3203
8
    bool append_sign_manually = false;
3204
8
    if (is_negative && int_value == 0) {
3205
0
        append_sign_manually = true;
3206
0
    }
3207
3208
8
    char local[N];
3209
8
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3210
8
    const Int32 integer_str_len = N - (p - local);
3211
8
    const Int32 frac_str_len = decimal_places;
3212
8
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3213
8
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3214
3215
8
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3216
    // Modify a string passed via stringref
3217
8
    char* result_data = const_cast<char*>(result.data);
3218
3219
8
    if (append_sign_manually) {
3220
0
        memset(result_data, '-', 1);
3221
0
    }
3222
3223
8
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3224
8
    if (decimal_places > 0) {
3225
6
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3226
6
    }
3227
3228
    // Convert fractional part to string with proper padding
3229
8
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3230
47
    for (int i = 0; i <= decimal_places - 1; ++i) {
3231
39
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3232
39
        remaining_frac /= 10;
3233
39
    }
3234
8
    return result;
3235
8
}
_ZN5doris11FormatRound15do_format_roundInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3173
9
                          Int32 decimal_places) {
3174
9
    static_assert(std::is_integral<T>::value);
3175
9
    const bool is_negative = int_value < 0 || frac_value < 0;
3176
3177
    // do round to frac_part based on decimal_places
3178
9
    if (scale > decimal_places && decimal_places > 0) {
3179
0
        DCHECK(scale <= 38);
3180
        // do rounding, so we need to reserve decimal_places + 1 digits
3181
0
        auto multiplier =
3182
0
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3183
        // do divide first to avoid overflow
3184
        // after round frac_value will be positive by design
3185
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3186
0
        frac_value /= 10;
3187
9
    } else if (scale < decimal_places && decimal_places > 0) {
3188
        // since scale <= decimal_places, overflow is impossible
3189
7
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3190
7
    }
3191
3192
    // Calculate power of 10 for decimal_places
3193
9
    T decimal_power = common::exp10_i32(decimal_places);
3194
9
    if (frac_value == decimal_power) {
3195
0
        if (is_negative) {
3196
0
            int_value -= 1;
3197
0
        } else {
3198
0
            int_value += 1;
3199
0
        }
3200
0
        frac_value = 0;
3201
0
    }
3202
3203
9
    bool append_sign_manually = false;
3204
9
    if (is_negative && int_value == 0) {
3205
0
        append_sign_manually = true;
3206
0
    }
3207
3208
9
    char local[N];
3209
9
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3210
9
    const Int32 integer_str_len = N - (p - local);
3211
9
    const Int32 frac_str_len = decimal_places;
3212
9
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3213
9
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3214
3215
9
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3216
    // Modify a string passed via stringref
3217
9
    char* result_data = const_cast<char*>(result.data);
3218
3219
9
    if (append_sign_manually) {
3220
0
        memset(result_data, '-', 1);
3221
0
    }
3222
3223
9
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3224
9
    if (decimal_places > 0) {
3225
7
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3226
7
    }
3227
3228
    // Convert fractional part to string with proper padding
3229
9
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3230
50
    for (int i = 0; i <= decimal_places - 1; ++i) {
3231
41
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3232
41
        remaining_frac /= 10;
3233
41
    }
3234
9
    return result;
3235
9
}
_ZN5doris11FormatRound15do_format_roundInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3173
3
                          Int32 decimal_places) {
3174
3
    static_assert(std::is_integral<T>::value);
3175
3
    const bool is_negative = int_value < 0 || frac_value < 0;
3176
3177
    // do round to frac_part based on decimal_places
3178
3
    if (scale > decimal_places && decimal_places > 0) {
3179
3
        DCHECK(scale <= 38);
3180
        // do rounding, so we need to reserve decimal_places + 1 digits
3181
3
        auto multiplier =
3182
3
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3183
        // do divide first to avoid overflow
3184
        // after round frac_value will be positive by design
3185
3
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3186
3
        frac_value /= 10;
3187
3
    } else if (scale < decimal_places && decimal_places > 0) {
3188
        // since scale <= decimal_places, overflow is impossible
3189
0
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3190
0
    }
3191
3192
    // Calculate power of 10 for decimal_places
3193
3
    T decimal_power = common::exp10_i32(decimal_places);
3194
3
    if (frac_value == decimal_power) {
3195
0
        if (is_negative) {
3196
0
            int_value -= 1;
3197
0
        } else {
3198
0
            int_value += 1;
3199
0
        }
3200
0
        frac_value = 0;
3201
0
    }
3202
3203
3
    bool append_sign_manually = false;
3204
3
    if (is_negative && int_value == 0) {
3205
0
        append_sign_manually = true;
3206
0
    }
3207
3208
3
    char local[N];
3209
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3210
3
    const Int32 integer_str_len = N - (p - local);
3211
3
    const Int32 frac_str_len = decimal_places;
3212
3
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3213
3
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3214
3215
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3216
    // Modify a string passed via stringref
3217
3
    char* result_data = const_cast<char*>(result.data);
3218
3219
3
    if (append_sign_manually) {
3220
0
        memset(result_data, '-', 1);
3221
0
    }
3222
3223
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3224
3
    if (decimal_places > 0) {
3225
3
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3226
3
    }
3227
3228
    // Convert fractional part to string with proper padding
3229
3
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3230
9
    for (int i = 0; i <= decimal_places - 1; ++i) {
3231
6
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3232
6
        remaining_frac /= 10;
3233
6
    }
3234
3
    return result;
3235
3
}
Unexecuted instantiation: _ZN5doris11FormatRound15do_format_roundIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
_ZN5doris11FormatRound15do_format_roundIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3173
11
                          Int32 decimal_places) {
3174
11
    static_assert(std::is_integral<T>::value);
3175
11
    const bool is_negative = int_value < 0 || frac_value < 0;
3176
3177
    // do round to frac_part based on decimal_places
3178
11
    if (scale > decimal_places && decimal_places > 0) {
3179
11
        DCHECK(scale <= 38);
3180
        // do rounding, so we need to reserve decimal_places + 1 digits
3181
11
        auto multiplier =
3182
11
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3183
        // do divide first to avoid overflow
3184
        // after round frac_value will be positive by design
3185
11
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3186
11
        frac_value /= 10;
3187
11
    } else if (scale < decimal_places && decimal_places > 0) {
3188
        // since scale <= decimal_places, overflow is impossible
3189
0
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3190
0
    }
3191
3192
    // Calculate power of 10 for decimal_places
3193
11
    T decimal_power = common::exp10_i32(decimal_places);
3194
11
    if (frac_value == decimal_power) {
3195
0
        if (is_negative) {
3196
0
            int_value -= 1;
3197
0
        } else {
3198
0
            int_value += 1;
3199
0
        }
3200
0
        frac_value = 0;
3201
0
    }
3202
3203
11
    bool append_sign_manually = false;
3204
11
    if (is_negative && int_value == 0) {
3205
0
        append_sign_manually = true;
3206
0
    }
3207
3208
11
    char local[N];
3209
11
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3210
11
    const Int32 integer_str_len = N - (p - local);
3211
11
    const Int32 frac_str_len = decimal_places;
3212
11
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3213
11
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3214
3215
11
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3216
    // Modify a string passed via stringref
3217
11
    char* result_data = const_cast<char*>(result.data);
3218
3219
11
    if (append_sign_manually) {
3220
0
        memset(result_data, '-', 1);
3221
0
    }
3222
3223
11
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3224
11
    if (decimal_places > 0) {
3225
11
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3226
11
    }
3227
3228
    // Convert fractional part to string with proper padding
3229
11
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3230
33
    for (int i = 0; i <= decimal_places - 1; ++i) {
3231
22
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3232
22
        remaining_frac /= 10;
3233
22
    }
3234
11
    return result;
3235
11
}
3236
3237
} // namespace FormatRound
3238
3239
struct MoneyFormatDoubleImpl {
3240
1
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; }
3241
3242
    static void execute(FunctionContext* context, ColumnString* result_column,
3243
1
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3244
1
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3245
        // when scale is above 38, we will go here
3246
5
        for (size_t i = 0; i < input_rows_count; i++) {
3247
            // round to 2 decimal places
3248
4
            double value =
3249
4
                    MathFunctions::my_double_round(data_column->get_element(i), 2, false, false);
3250
4
            StringRef str = MoneyFormat::do_money_format(context, fmt::format("{:.2f}", value));
3251
4
            result_column->insert_data(str.data, str.size);
3252
4
        }
3253
1
    }
3254
};
3255
3256
struct MoneyFormatInt64Impl {
3257
1
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt64>()}; }
3258
3259
    static void execute(FunctionContext* context, ColumnString* result_column,
3260
1
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3261
1
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3262
4
        for (size_t i = 0; i < input_rows_count; i++) {
3263
3
            Int64 value = data_column->get_element(i);
3264
3
            StringRef str =
3265
3
                    MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_INT64()>(
3266
3
                            context, 0, value, 0);
3267
3
            result_column->insert_data(str.data, str.size);
3268
3
        }
3269
1
    }
3270
};
3271
3272
struct MoneyFormatInt128Impl {
3273
1
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt128>()}; }
3274
3275
    static void execute(FunctionContext* context, ColumnString* result_column,
3276
1
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3277
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3278
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3279
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3280
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3281
4
        for (size_t i = 0; i < input_rows_count; i++) {
3282
3
            Int128 value = data_column->get_element(i);
3283
3
            StringRef str =
3284
3
                    MoneyFormat::do_money_format<Int128, MoneyFormat::MAX_FORMAT_LEN_INT128()>(
3285
3
                            context, 0, value, 0);
3286
3
            result_column->insert_data(str.data, str.size);
3287
3
        }
3288
1
    }
3289
};
3290
3291
template <PrimitiveType Type>
3292
struct MoneyFormatDecimalImpl {
3293
5
    static DataTypes get_variadic_argument_types() {
3294
5
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3295
5
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv
Line
Count
Source
3293
1
    static DataTypes get_variadic_argument_types() {
3294
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3295
1
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv
Line
Count
Source
3293
1
    static DataTypes get_variadic_argument_types() {
3294
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3295
1
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv
Line
Count
Source
3293
1
    static DataTypes get_variadic_argument_types() {
3294
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3295
1
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv
Line
Count
Source
3293
1
    static DataTypes get_variadic_argument_types() {
3294
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3295
1
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv
Line
Count
Source
3293
1
    static DataTypes get_variadic_argument_types() {
3294
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3295
1
    }
3296
3297
    static void execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr,
3298
5
                        size_t input_rows_count) {
3299
5
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3300
16
            for (size_t i = 0; i < input_rows_count; i++) {
3301
14
                const auto& value = decimalv2_column->get_element(i);
3302
                // unified_frac_value has 3 digits
3303
14
                auto unified_frac_value = value.frac_value() / 1000000;
3304
14
                StringRef str =
3305
14
                        MoneyFormat::do_money_format<Int128,
3306
14
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3307
14
                                context, 3, value.int_value(), unified_frac_value);
3308
3309
14
                result_column->insert_data(str.data, str.size);
3310
14
            }
3311
3
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3312
0
            const UInt32 scale = decimal32_column->get_scale();
3313
0
            for (size_t i = 0; i < input_rows_count; i++) {
3314
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3315
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3316
0
                StringRef str =
3317
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3318
0
                                context, scale, static_cast<Int64>(whole_part),
3319
0
                                static_cast<Int64>(frac_part));
3320
3321
0
                result_column->insert_data(str.data, str.size);
3322
0
            }
3323
3
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3324
3
            const UInt32 scale = decimal64_column->get_scale();
3325
8
            for (size_t i = 0; i < input_rows_count; i++) {
3326
5
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3327
5
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3328
3329
5
                StringRef str =
3330
5
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3331
5
                                context, scale, whole_part, frac_part);
3332
3333
5
                result_column->insert_data(str.data, str.size);
3334
5
            }
3335
3
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3336
0
            const UInt32 scale = decimal128_column->get_scale();
3337
0
            for (size_t i = 0; i < input_rows_count; i++) {
3338
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3339
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3340
3341
0
                StringRef str =
3342
0
                        MoneyFormat::do_money_format<Int128,
3343
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3344
0
                                context, scale, whole_part, frac_part);
3345
3346
0
                result_column->insert_data(str.data, str.size);
3347
0
            }
3348
0
        } else {
3349
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3350
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3351
0
        }
3352
        // TODO: decimal256
3353
        /* else if (auto* decimal256_column =
3354
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3355
            const UInt32 scale = decimal256_column->get_scale();
3356
            const auto multiplier =
3357
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3358
            for (size_t i = 0; i < input_rows_count; i++) {
3359
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3360
                if (scale > 2) {
3361
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3362
                    frac_part = Decimal256(frac_part / multiplier + delta);
3363
                } else if (scale < 2) {
3364
                    frac_part = Decimal256(frac_part * multiplier);
3365
                }
3366
3367
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3368
                        context, decimal256_column->get_intergral_part(i), frac_part);
3369
3370
                result_column->insert_data(str.data, str.size);
3371
            }
3372
        }*/
3373
5
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3298
2
                        size_t input_rows_count) {
3299
2
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3300
16
            for (size_t i = 0; i < input_rows_count; i++) {
3301
14
                const auto& value = decimalv2_column->get_element(i);
3302
                // unified_frac_value has 3 digits
3303
14
                auto unified_frac_value = value.frac_value() / 1000000;
3304
14
                StringRef str =
3305
14
                        MoneyFormat::do_money_format<Int128,
3306
14
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3307
14
                                context, 3, value.int_value(), unified_frac_value);
3308
3309
14
                result_column->insert_data(str.data, str.size);
3310
14
            }
3311
2
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3312
0
            const UInt32 scale = decimal32_column->get_scale();
3313
0
            for (size_t i = 0; i < input_rows_count; i++) {
3314
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3315
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3316
0
                StringRef str =
3317
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3318
0
                                context, scale, static_cast<Int64>(whole_part),
3319
0
                                static_cast<Int64>(frac_part));
3320
3321
0
                result_column->insert_data(str.data, str.size);
3322
0
            }
3323
0
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3324
0
            const UInt32 scale = decimal64_column->get_scale();
3325
0
            for (size_t i = 0; i < input_rows_count; i++) {
3326
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3327
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3328
3329
0
                StringRef str =
3330
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3331
0
                                context, scale, whole_part, frac_part);
3332
3333
0
                result_column->insert_data(str.data, str.size);
3334
0
            }
3335
0
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3336
0
            const UInt32 scale = decimal128_column->get_scale();
3337
0
            for (size_t i = 0; i < input_rows_count; i++) {
3338
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3339
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3340
3341
0
                StringRef str =
3342
0
                        MoneyFormat::do_money_format<Int128,
3343
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3344
0
                                context, scale, whole_part, frac_part);
3345
3346
0
                result_column->insert_data(str.data, str.size);
3347
0
            }
3348
0
        } else {
3349
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3350
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3351
0
        }
3352
        // TODO: decimal256
3353
        /* else if (auto* decimal256_column =
3354
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3355
            const UInt32 scale = decimal256_column->get_scale();
3356
            const auto multiplier =
3357
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3358
            for (size_t i = 0; i < input_rows_count; i++) {
3359
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3360
                if (scale > 2) {
3361
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3362
                    frac_part = Decimal256(frac_part / multiplier + delta);
3363
                } else if (scale < 2) {
3364
                    frac_part = Decimal256(frac_part * multiplier);
3365
                }
3366
3367
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3368
                        context, decimal256_column->get_intergral_part(i), frac_part);
3369
3370
                result_column->insert_data(str.data, str.size);
3371
            }
3372
        }*/
3373
2
    }
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3298
3
                        size_t input_rows_count) {
3299
3
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3300
0
            for (size_t i = 0; i < input_rows_count; i++) {
3301
0
                const auto& value = decimalv2_column->get_element(i);
3302
                // unified_frac_value has 3 digits
3303
0
                auto unified_frac_value = value.frac_value() / 1000000;
3304
0
                StringRef str =
3305
0
                        MoneyFormat::do_money_format<Int128,
3306
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3307
0
                                context, 3, value.int_value(), unified_frac_value);
3308
3309
0
                result_column->insert_data(str.data, str.size);
3310
0
            }
3311
3
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3312
0
            const UInt32 scale = decimal32_column->get_scale();
3313
0
            for (size_t i = 0; i < input_rows_count; i++) {
3314
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3315
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3316
0
                StringRef str =
3317
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3318
0
                                context, scale, static_cast<Int64>(whole_part),
3319
0
                                static_cast<Int64>(frac_part));
3320
3321
0
                result_column->insert_data(str.data, str.size);
3322
0
            }
3323
3
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3324
3
            const UInt32 scale = decimal64_column->get_scale();
3325
8
            for (size_t i = 0; i < input_rows_count; i++) {
3326
5
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3327
5
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3328
3329
5
                StringRef str =
3330
5
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3331
5
                                context, scale, whole_part, frac_part);
3332
3333
5
                result_column->insert_data(str.data, str.size);
3334
5
            }
3335
3
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3336
0
            const UInt32 scale = decimal128_column->get_scale();
3337
0
            for (size_t i = 0; i < input_rows_count; i++) {
3338
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3339
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3340
3341
0
                StringRef str =
3342
0
                        MoneyFormat::do_money_format<Int128,
3343
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3344
0
                                context, scale, whole_part, frac_part);
3345
3346
0
                result_column->insert_data(str.data, str.size);
3347
0
            }
3348
0
        } else {
3349
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3350
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3351
0
        }
3352
        // TODO: decimal256
3353
        /* else if (auto* decimal256_column =
3354
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3355
            const UInt32 scale = decimal256_column->get_scale();
3356
            const auto multiplier =
3357
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3358
            for (size_t i = 0; i < input_rows_count; i++) {
3359
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3360
                if (scale > 2) {
3361
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3362
                    frac_part = Decimal256(frac_part / multiplier + delta);
3363
                } else if (scale < 2) {
3364
                    frac_part = Decimal256(frac_part * multiplier);
3365
                }
3366
3367
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3368
                        context, decimal256_column->get_intergral_part(i), frac_part);
3369
3370
                result_column->insert_data(str.data, str.size);
3371
            }
3372
        }*/
3373
3
    }
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
3374
};
3375
3376
struct FormatRoundDoubleImpl {
3377
1
    static DataTypes get_variadic_argument_types() {
3378
1
        return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()};
3379
1
    }
3380
3381
4
    static std::string add_thousands_separator(const std::string& formatted_num) {
3382
        //  Find the position of the decimal point
3383
4
        size_t dot_pos = formatted_num.find('.');
3384
4
        if (dot_pos == std::string::npos) {
3385
0
            dot_pos = formatted_num.size();
3386
0
        }
3387
3388
        // Handle the integer part
3389
4
        int start = (formatted_num[0] == '-') ? 1 : 0;
3390
4
        int digit_count = dot_pos - start;
3391
3392
        // There is no need to add commas.
3393
4
        if (digit_count <= 3) {
3394
2
            return formatted_num;
3395
2
        }
3396
3397
2
        std::string result;
3398
3399
2
        if (start == 1) result += '-';
3400
3401
        // Add the integer part (with comma)
3402
2
        int first_group = digit_count % 3;
3403
2
        if (first_group == 0) first_group = 3;
3404
2
        result.append(formatted_num, start, first_group);
3405
3406
6
        for (size_t i = start + first_group; i < dot_pos; i += 3) {
3407
4
            result += ',';
3408
4
            result.append(formatted_num, i, 3);
3409
4
        }
3410
3411
        // Add the decimal part (keep as it is)
3412
2
        if (dot_pos != formatted_num.size()) {
3413
2
            result.append(formatted_num, dot_pos);
3414
2
        }
3415
3416
2
        return result;
3417
4
    }
3418
3419
    template <bool is_const>
3420
    static Status execute(FunctionContext* context, ColumnString* result_column,
3421
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3422
1
                          size_t input_rows_count) {
3423
1
        const auto& arg_column_data_2 =
3424
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3425
1
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3426
        // when scale is above 38, we will go here
3427
5
        for (size_t i = 0; i < input_rows_count; i++) {
3428
4
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3429
4
            if (decimal_places < 0 || decimal_places > 1024) {
3430
0
                return Status::InvalidArgument(
3431
0
                        "The second argument is {}, it should be in range [0, 1024].",
3432
0
                        decimal_places);
3433
0
            }
3434
            // round to `decimal_places` decimal places
3435
4
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3436
4
                                                          decimal_places, false, false);
3437
4
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3438
4
            if (std::isfinite(value)) {
3439
4
                result_column->insert_value(add_thousands_separator(formatted_value));
3440
4
            } else {
3441
                // if value is not finite, we just insert the original formatted value
3442
                // e.g. "inf", "-inf", "nan"
3443
0
                result_column->insert_value(formatted_value);
3444
0
            }
3445
4
        }
3446
1
        return Status::OK();
3447
1
    }
Unexecuted instantiation: _ZN5doris21FormatRoundDoubleImpl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
_ZN5doris21FormatRoundDoubleImpl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3422
1
                          size_t input_rows_count) {
3423
1
        const auto& arg_column_data_2 =
3424
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3425
1
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3426
        // when scale is above 38, we will go here
3427
5
        for (size_t i = 0; i < input_rows_count; i++) {
3428
4
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3429
4
            if (decimal_places < 0 || decimal_places > 1024) {
3430
0
                return Status::InvalidArgument(
3431
0
                        "The second argument is {}, it should be in range [0, 1024].",
3432
0
                        decimal_places);
3433
0
            }
3434
            // round to `decimal_places` decimal places
3435
4
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3436
4
                                                          decimal_places, false, false);
3437
4
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3438
4
            if (std::isfinite(value)) {
3439
4
                result_column->insert_value(add_thousands_separator(formatted_value));
3440
4
            } else {
3441
                // if value is not finite, we just insert the original formatted value
3442
                // e.g. "inf", "-inf", "nan"
3443
0
                result_column->insert_value(formatted_value);
3444
0
            }
3445
4
        }
3446
1
        return Status::OK();
3447
1
    }
3448
};
3449
3450
struct FormatRoundInt64Impl {
3451
1
    static DataTypes get_variadic_argument_types() {
3452
1
        return {std::make_shared<DataTypeInt64>(), std::make_shared<DataTypeInt32>()};
3453
1
    }
3454
3455
    template <bool is_const>
3456
    static Status execute(FunctionContext* context, ColumnString* result_column,
3457
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3458
1
                          size_t input_rows_count) {
3459
1
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3460
1
        const auto& arg_column_data_2 =
3461
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3462
9
        for (size_t i = 0; i < input_rows_count; i++) {
3463
8
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3464
8
            if (decimal_places < 0 || decimal_places > 1024) {
3465
0
                return Status::InvalidArgument(
3466
0
                        "The second argument is {}, it should be in range [0, 1024].",
3467
0
                        decimal_places);
3468
0
            }
3469
8
            Int64 value = data_column->get_element(i);
3470
8
            StringRef str =
3471
8
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3472
8
                            context, 0, value, 0, decimal_places);
3473
8
            result_column->insert_data(str.data, str.size);
3474
8
        }
3475
1
        return Status::OK();
3476
1
    }
Unexecuted instantiation: _ZN5doris20FormatRoundInt64Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
_ZN5doris20FormatRoundInt64Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3458
1
                          size_t input_rows_count) {
3459
1
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3460
1
        const auto& arg_column_data_2 =
3461
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3462
9
        for (size_t i = 0; i < input_rows_count; i++) {
3463
8
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3464
8
            if (decimal_places < 0 || decimal_places > 1024) {
3465
0
                return Status::InvalidArgument(
3466
0
                        "The second argument is {}, it should be in range [0, 1024].",
3467
0
                        decimal_places);
3468
0
            }
3469
8
            Int64 value = data_column->get_element(i);
3470
8
            StringRef str =
3471
8
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3472
8
                            context, 0, value, 0, decimal_places);
3473
8
            result_column->insert_data(str.data, str.size);
3474
8
        }
3475
1
        return Status::OK();
3476
1
    }
3477
};
3478
3479
struct FormatRoundInt128Impl {
3480
1
    static DataTypes get_variadic_argument_types() {
3481
1
        return {std::make_shared<DataTypeInt128>(), std::make_shared<DataTypeInt32>()};
3482
1
    }
3483
3484
    template <bool is_const>
3485
    static Status execute(FunctionContext* context, ColumnString* result_column,
3486
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3487
1
                          size_t input_rows_count) {
3488
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3489
1
        const auto& arg_column_data_2 =
3490
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3491
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3492
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3493
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3494
10
        for (size_t i = 0; i < input_rows_count; i++) {
3495
9
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3496
9
            if (decimal_places < 0 || decimal_places > 1024) {
3497
0
                return Status::InvalidArgument(
3498
0
                        "The second argument is {}, it should be in range [0, 1024].",
3499
0
                        decimal_places);
3500
0
            }
3501
9
            Int128 value = data_column->get_element(i);
3502
9
            StringRef str =
3503
9
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3504
9
                            context, 0, value, 0, decimal_places);
3505
9
            result_column->insert_data(str.data, str.size);
3506
9
        }
3507
1
        return Status::OK();
3508
1
    }
Unexecuted instantiation: _ZN5doris21FormatRoundInt128Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
_ZN5doris21FormatRoundInt128Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3487
1
                          size_t input_rows_count) {
3488
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3489
1
        const auto& arg_column_data_2 =
3490
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3491
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3492
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3493
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3494
10
        for (size_t i = 0; i < input_rows_count; i++) {
3495
9
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3496
9
            if (decimal_places < 0 || decimal_places > 1024) {
3497
0
                return Status::InvalidArgument(
3498
0
                        "The second argument is {}, it should be in range [0, 1024].",
3499
0
                        decimal_places);
3500
0
            }
3501
9
            Int128 value = data_column->get_element(i);
3502
9
            StringRef str =
3503
9
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3504
9
                            context, 0, value, 0, decimal_places);
3505
9
            result_column->insert_data(str.data, str.size);
3506
9
        }
3507
1
        return Status::OK();
3508
1
    }
3509
};
3510
3511
template <PrimitiveType Type>
3512
struct FormatRoundDecimalImpl {
3513
5
    static DataTypes get_variadic_argument_types() {
3514
5
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3515
5
                std::make_shared<DataTypeInt32>()};
3516
5
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv
Line
Count
Source
3513
1
    static DataTypes get_variadic_argument_types() {
3514
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3515
1
                std::make_shared<DataTypeInt32>()};
3516
1
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv
Line
Count
Source
3513
1
    static DataTypes get_variadic_argument_types() {
3514
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3515
1
                std::make_shared<DataTypeInt32>()};
3516
1
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv
Line
Count
Source
3513
1
    static DataTypes get_variadic_argument_types() {
3514
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3515
1
                std::make_shared<DataTypeInt32>()};
3516
1
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv
Line
Count
Source
3513
1
    static DataTypes get_variadic_argument_types() {
3514
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3515
1
                std::make_shared<DataTypeInt32>()};
3516
1
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv
Line
Count
Source
3513
1
    static DataTypes get_variadic_argument_types() {
3514
1
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3515
1
                std::make_shared<DataTypeInt32>()};
3516
1
    }
3517
3518
    template <bool is_const>
3519
    static Status execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr,
3520
9
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3521
9
        const auto& arg_column_data_2 =
3522
9
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3523
9
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3524
4
            for (size_t i = 0; i < input_rows_count; i++) {
3525
3
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3526
3
                if (decimal_places < 0 || decimal_places > 1024) {
3527
0
                    return Status::InvalidArgument(
3528
0
                            "The second argument is {}, it should be in range [0, 1024].",
3529
0
                            decimal_places);
3530
0
                }
3531
3
                const auto& value = decimalv2_column->get_element(i);
3532
                // unified_frac_value has 3 digits
3533
3
                auto unified_frac_value = value.frac_value() / 1000000;
3534
3
                StringRef str =
3535
3
                        FormatRound::do_format_round<Int128,
3536
3
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3537
3
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3538
3539
3
                result_column->insert_data(str.data, str.size);
3540
3
            }
3541
8
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3542
0
            const UInt32 scale = decimal32_column->get_scale();
3543
0
            for (size_t i = 0; i < input_rows_count; i++) {
3544
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3545
0
                if (decimal_places < 0 || decimal_places > 1024) {
3546
0
                    return Status::InvalidArgument(
3547
0
                            "The second argument is {}, it should be in range [0, 1024].",
3548
0
                            decimal_places);
3549
0
                }
3550
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3551
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3552
0
                StringRef str =
3553
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3554
0
                                context, scale, static_cast<Int64>(whole_part),
3555
0
                                static_cast<Int64>(frac_part), decimal_places);
3556
3557
0
                result_column->insert_data(str.data, str.size);
3558
0
            }
3559
8
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3560
8
            const UInt32 scale = decimal64_column->get_scale();
3561
19
            for (size_t i = 0; i < input_rows_count; i++) {
3562
11
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3563
11
                if (decimal_places < 0 || decimal_places > 1024) {
3564
0
                    return Status::InvalidArgument(
3565
0
                            "The second argument is {}, it should be in range [0, 1024].",
3566
0
                            decimal_places);
3567
0
                }
3568
11
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3569
11
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3570
3571
11
                StringRef str =
3572
11
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3573
11
                                context, scale, whole_part, frac_part, decimal_places);
3574
3575
11
                result_column->insert_data(str.data, str.size);
3576
11
            }
3577
8
        } else if (const auto* decimal128_column =
3578
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3579
0
            const UInt32 scale = decimal128_column->get_scale();
3580
0
            for (size_t i = 0; i < input_rows_count; i++) {
3581
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3582
0
                if (decimal_places < 0 || decimal_places > 1024) {
3583
0
                    return Status::InvalidArgument(
3584
0
                            "The second argument is {}, it should be in range [0, 1024].",
3585
0
                            decimal_places);
3586
0
                }
3587
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3588
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3589
3590
0
                StringRef str =
3591
0
                        FormatRound::do_format_round<Int128,
3592
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3593
0
                                context, scale, whole_part, frac_part, decimal_places);
3594
3595
0
                result_column->insert_data(str.data, str.size);
3596
0
            }
3597
0
        } else {
3598
0
            return Status::InternalError("Not supported input argument type {}",
3599
0
                                         col_ptr->get_name());
3600
0
        }
3601
9
        return Status::OK();
3602
9
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3520
1
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3521
1
        const auto& arg_column_data_2 =
3522
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3523
1
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3524
4
            for (size_t i = 0; i < input_rows_count; i++) {
3525
3
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3526
3
                if (decimal_places < 0 || decimal_places > 1024) {
3527
0
                    return Status::InvalidArgument(
3528
0
                            "The second argument is {}, it should be in range [0, 1024].",
3529
0
                            decimal_places);
3530
0
                }
3531
3
                const auto& value = decimalv2_column->get_element(i);
3532
                // unified_frac_value has 3 digits
3533
3
                auto unified_frac_value = value.frac_value() / 1000000;
3534
3
                StringRef str =
3535
3
                        FormatRound::do_format_round<Int128,
3536
3
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3537
3
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3538
3539
3
                result_column->insert_data(str.data, str.size);
3540
3
            }
3541
1
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3542
0
            const UInt32 scale = decimal32_column->get_scale();
3543
0
            for (size_t i = 0; i < input_rows_count; i++) {
3544
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3545
0
                if (decimal_places < 0 || decimal_places > 1024) {
3546
0
                    return Status::InvalidArgument(
3547
0
                            "The second argument is {}, it should be in range [0, 1024].",
3548
0
                            decimal_places);
3549
0
                }
3550
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3551
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3552
0
                StringRef str =
3553
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3554
0
                                context, scale, static_cast<Int64>(whole_part),
3555
0
                                static_cast<Int64>(frac_part), decimal_places);
3556
3557
0
                result_column->insert_data(str.data, str.size);
3558
0
            }
3559
0
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3560
0
            const UInt32 scale = decimal64_column->get_scale();
3561
0
            for (size_t i = 0; i < input_rows_count; i++) {
3562
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3563
0
                if (decimal_places < 0 || decimal_places > 1024) {
3564
0
                    return Status::InvalidArgument(
3565
0
                            "The second argument is {}, it should be in range [0, 1024].",
3566
0
                            decimal_places);
3567
0
                }
3568
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3569
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3570
3571
0
                StringRef str =
3572
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3573
0
                                context, scale, whole_part, frac_part, decimal_places);
3574
3575
0
                result_column->insert_data(str.data, str.size);
3576
0
            }
3577
0
        } else if (const auto* decimal128_column =
3578
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3579
0
            const UInt32 scale = decimal128_column->get_scale();
3580
0
            for (size_t i = 0; i < input_rows_count; i++) {
3581
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3582
0
                if (decimal_places < 0 || decimal_places > 1024) {
3583
0
                    return Status::InvalidArgument(
3584
0
                            "The second argument is {}, it should be in range [0, 1024].",
3585
0
                            decimal_places);
3586
0
                }
3587
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3588
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3589
3590
0
                StringRef str =
3591
0
                        FormatRound::do_format_round<Int128,
3592
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3593
0
                                context, scale, whole_part, frac_part, decimal_places);
3594
3595
0
                result_column->insert_data(str.data, str.size);
3596
0
            }
3597
0
        } else {
3598
0
            return Status::InternalError("Not supported input argument type {}",
3599
0
                                         col_ptr->get_name());
3600
0
        }
3601
1
        return Status::OK();
3602
1
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3520
2
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3521
2
        const auto& arg_column_data_2 =
3522
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3523
2
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3524
0
            for (size_t i = 0; i < input_rows_count; i++) {
3525
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3526
0
                if (decimal_places < 0 || decimal_places > 1024) {
3527
0
                    return Status::InvalidArgument(
3528
0
                            "The second argument is {}, it should be in range [0, 1024].",
3529
0
                            decimal_places);
3530
0
                }
3531
0
                const auto& value = decimalv2_column->get_element(i);
3532
                // unified_frac_value has 3 digits
3533
0
                auto unified_frac_value = value.frac_value() / 1000000;
3534
0
                StringRef str =
3535
0
                        FormatRound::do_format_round<Int128,
3536
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3537
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3538
3539
0
                result_column->insert_data(str.data, str.size);
3540
0
            }
3541
2
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3542
0
            const UInt32 scale = decimal32_column->get_scale();
3543
0
            for (size_t i = 0; i < input_rows_count; i++) {
3544
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3545
0
                if (decimal_places < 0 || decimal_places > 1024) {
3546
0
                    return Status::InvalidArgument(
3547
0
                            "The second argument is {}, it should be in range [0, 1024].",
3548
0
                            decimal_places);
3549
0
                }
3550
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3551
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3552
0
                StringRef str =
3553
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3554
0
                                context, scale, static_cast<Int64>(whole_part),
3555
0
                                static_cast<Int64>(frac_part), decimal_places);
3556
3557
0
                result_column->insert_data(str.data, str.size);
3558
0
            }
3559
2
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3560
2
            const UInt32 scale = decimal64_column->get_scale();
3561
4
            for (size_t i = 0; i < input_rows_count; i++) {
3562
2
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3563
2
                if (decimal_places < 0 || decimal_places > 1024) {
3564
0
                    return Status::InvalidArgument(
3565
0
                            "The second argument is {}, it should be in range [0, 1024].",
3566
0
                            decimal_places);
3567
0
                }
3568
2
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3569
2
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3570
3571
2
                StringRef str =
3572
2
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3573
2
                                context, scale, whole_part, frac_part, decimal_places);
3574
3575
2
                result_column->insert_data(str.data, str.size);
3576
2
            }
3577
2
        } else if (const auto* decimal128_column =
3578
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3579
0
            const UInt32 scale = decimal128_column->get_scale();
3580
0
            for (size_t i = 0; i < input_rows_count; i++) {
3581
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3582
0
                if (decimal_places < 0 || decimal_places > 1024) {
3583
0
                    return Status::InvalidArgument(
3584
0
                            "The second argument is {}, it should be in range [0, 1024].",
3585
0
                            decimal_places);
3586
0
                }
3587
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3588
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3589
3590
0
                StringRef str =
3591
0
                        FormatRound::do_format_round<Int128,
3592
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3593
0
                                context, scale, whole_part, frac_part, decimal_places);
3594
3595
0
                result_column->insert_data(str.data, str.size);
3596
0
            }
3597
0
        } else {
3598
0
            return Status::InternalError("Not supported input argument type {}",
3599
0
                                         col_ptr->get_name());
3600
0
        }
3601
2
        return Status::OK();
3602
2
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3520
6
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3521
6
        const auto& arg_column_data_2 =
3522
6
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3523
6
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3524
0
            for (size_t i = 0; i < input_rows_count; i++) {
3525
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3526
0
                if (decimal_places < 0 || decimal_places > 1024) {
3527
0
                    return Status::InvalidArgument(
3528
0
                            "The second argument is {}, it should be in range [0, 1024].",
3529
0
                            decimal_places);
3530
0
                }
3531
0
                const auto& value = decimalv2_column->get_element(i);
3532
                // unified_frac_value has 3 digits
3533
0
                auto unified_frac_value = value.frac_value() / 1000000;
3534
0
                StringRef str =
3535
0
                        FormatRound::do_format_round<Int128,
3536
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3537
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3538
3539
0
                result_column->insert_data(str.data, str.size);
3540
0
            }
3541
6
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3542
0
            const UInt32 scale = decimal32_column->get_scale();
3543
0
            for (size_t i = 0; i < input_rows_count; i++) {
3544
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3545
0
                if (decimal_places < 0 || decimal_places > 1024) {
3546
0
                    return Status::InvalidArgument(
3547
0
                            "The second argument is {}, it should be in range [0, 1024].",
3548
0
                            decimal_places);
3549
0
                }
3550
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3551
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3552
0
                StringRef str =
3553
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3554
0
                                context, scale, static_cast<Int64>(whole_part),
3555
0
                                static_cast<Int64>(frac_part), decimal_places);
3556
3557
0
                result_column->insert_data(str.data, str.size);
3558
0
            }
3559
6
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3560
6
            const UInt32 scale = decimal64_column->get_scale();
3561
15
            for (size_t i = 0; i < input_rows_count; i++) {
3562
9
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3563
9
                if (decimal_places < 0 || decimal_places > 1024) {
3564
0
                    return Status::InvalidArgument(
3565
0
                            "The second argument is {}, it should be in range [0, 1024].",
3566
0
                            decimal_places);
3567
0
                }
3568
9
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3569
9
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3570
3571
9
                StringRef str =
3572
9
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3573
9
                                context, scale, whole_part, frac_part, decimal_places);
3574
3575
9
                result_column->insert_data(str.data, str.size);
3576
9
            }
3577
6
        } else if (const auto* decimal128_column =
3578
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3579
0
            const UInt32 scale = decimal128_column->get_scale();
3580
0
            for (size_t i = 0; i < input_rows_count; i++) {
3581
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3582
0
                if (decimal_places < 0 || decimal_places > 1024) {
3583
0
                    return Status::InvalidArgument(
3584
0
                            "The second argument is {}, it should be in range [0, 1024].",
3585
0
                            decimal_places);
3586
0
                }
3587
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3588
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3589
3590
0
                StringRef str =
3591
0
                        FormatRound::do_format_round<Int128,
3592
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3593
0
                                context, scale, whole_part, frac_part, decimal_places);
3594
3595
0
                result_column->insert_data(str.data, str.size);
3596
0
            }
3597
0
        } else {
3598
0
            return Status::InternalError("Not supported input argument type {}",
3599
0
                                         col_ptr->get_name());
3600
0
        }
3601
6
        return Status::OK();
3602
6
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
3603
};
3604
3605
class FunctionStringLocatePos : public IFunction {
3606
public:
3607
    static constexpr auto name = "locate";
3608
816
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
3609
0
    String get_name() const override { return name; }
3610
0
    size_t get_number_of_arguments() const override { return 3; }
3611
3612
814
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3613
814
        return std::make_shared<DataTypeInt32>();
3614
814
    }
3615
3616
1
    DataTypes get_variadic_argument_types_impl() const override {
3617
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3618
1
                std::make_shared<DataTypeInt32>()};
3619
1
    }
3620
3621
815
    bool is_variadic() const override { return true; }
3622
3623
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3624
576
                        uint32_t result, size_t input_rows_count) const override {
3625
576
        if (arguments.size() != 3) {
3626
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
3627
0
                                           get_name(), arguments.size());
3628
0
        }
3629
576
        bool col_const[3];
3630
576
        ColumnPtr argument_columns[3];
3631
2.30k
        for (int i = 0; i < 3; ++i) {
3632
1.72k
            std::tie(argument_columns[i], col_const[i]) =
3633
1.72k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3634
1.72k
        }
3635
3636
576
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
3637
576
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
3638
576
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
3639
3640
576
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
3641
576
        auto& vec_res = col_res->get_data();
3642
576
        vec_res.resize(block.rows());
3643
3644
576
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
3645
3646
576
        if (col_const[0]) {
3647
246
            std::visit(
3648
246
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
246
                        scalar_search<is_ascii, str_const, pos_const>(
3650
246
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
246
                                input_rows_count);
3652
246
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3648
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
22
                        scalar_search<is_ascii, str_const, pos_const>(
3650
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
22
                                input_rows_count);
3652
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
3648
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
60
                        scalar_search<is_ascii, str_const, pos_const>(
3650
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
60
                                input_rows_count);
3652
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
3648
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
60
                        scalar_search<is_ascii, str_const, pos_const>(
3650
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
60
                                input_rows_count);
3652
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
3648
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
60
                        scalar_search<is_ascii, str_const, pos_const>(
3650
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
60
                                input_rows_count);
3652
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
3648
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
22
                        scalar_search<is_ascii, str_const, pos_const>(
3650
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
22
                                input_rows_count);
3652
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
3648
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3649
22
                        scalar_search<is_ascii, str_const, pos_const>(
3650
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3651
22
                                input_rows_count);
3652
22
                    },
3653
246
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
3654
246
                    make_bool_variant(col_const[2]));
3655
3656
330
        } else {
3657
330
            std::visit(
3658
330
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
330
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
330
                                                                      col_pos->get_data(), vec_res,
3661
330
                                                                      input_rows_count);
3662
330
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3658
23
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
23
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
23
                                                                      col_pos->get_data(), vec_res,
3661
23
                                                                      input_rows_count);
3662
23
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3658
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
60
                                                                      col_pos->get_data(), vec_res,
3661
60
                                                                      input_rows_count);
3662
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
3658
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
60
                                                                      col_pos->get_data(), vec_res,
3661
60
                                                                      input_rows_count);
3662
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
3658
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
60
                                                                      col_pos->get_data(), vec_res,
3661
60
                                                                      input_rows_count);
3662
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
3658
61
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
61
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
61
                                                                      col_pos->get_data(), vec_res,
3661
61
                                                                      input_rows_count);
3662
61
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
3658
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
22
                                                                      col_pos->get_data(), vec_res,
3661
22
                                                                      input_rows_count);
3662
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
3658
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
22
                                                                      col_pos->get_data(), vec_res,
3661
22
                                                                      input_rows_count);
3662
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
3658
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3659
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3660
22
                                                                      col_pos->get_data(), vec_res,
3661
22
                                                                      input_rows_count);
3662
22
                    },
3663
330
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
3664
330
                    make_bool_variant(col_const[2]));
3665
330
        }
3666
576
        block.replace_by_position(result, std::move(col_res));
3667
576
        return Status::OK();
3668
576
    }
3669
3670
private:
3671
    template <bool is_ascii, bool str_const, bool pos_const>
3672
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
3673
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
3674
246
                       size_t size) const {
3675
246
        res.resize(size);
3676
246
        StringRef substr(ldata.data, ldata.size);
3677
246
        StringSearch search {&substr};
3678
3679
492
        for (int i = 0; i < size; ++i) {
3680
246
            res[i] = locate_pos<is_ascii>(substr,
3681
246
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
246
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
246
        }
3684
246
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
Line
Count
Source
3674
22
                       size_t size) const {
3675
22
        res.resize(size);
3676
22
        StringRef substr(ldata.data, ldata.size);
3677
22
        StringSearch search {&substr};
3678
3679
44
        for (int i = 0; i < size; ++i) {
3680
22
            res[i] = locate_pos<is_ascii>(substr,
3681
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
22
        }
3684
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
Line
Count
Source
3674
60
                       size_t size) const {
3675
60
        res.resize(size);
3676
60
        StringRef substr(ldata.data, ldata.size);
3677
60
        StringSearch search {&substr};
3678
3679
120
        for (int i = 0; i < size; ++i) {
3680
60
            res[i] = locate_pos<is_ascii>(substr,
3681
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
60
        }
3684
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
Line
Count
Source
3674
60
                       size_t size) const {
3675
60
        res.resize(size);
3676
60
        StringRef substr(ldata.data, ldata.size);
3677
60
        StringSearch search {&substr};
3678
3679
120
        for (int i = 0; i < size; ++i) {
3680
60
            res[i] = locate_pos<is_ascii>(substr,
3681
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
60
        }
3684
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
Line
Count
Source
3674
60
                       size_t size) const {
3675
60
        res.resize(size);
3676
60
        StringRef substr(ldata.data, ldata.size);
3677
60
        StringSearch search {&substr};
3678
3679
120
        for (int i = 0; i < size; ++i) {
3680
60
            res[i] = locate_pos<is_ascii>(substr,
3681
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
60
        }
3684
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
Line
Count
Source
3674
22
                       size_t size) const {
3675
22
        res.resize(size);
3676
22
        StringRef substr(ldata.data, ldata.size);
3677
22
        StringSearch search {&substr};
3678
3679
44
        for (int i = 0; i < size; ++i) {
3680
22
            res[i] = locate_pos<is_ascii>(substr,
3681
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
22
        }
3684
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSD_m
Line
Count
Source
3674
22
                       size_t size) const {
3675
22
        res.resize(size);
3676
22
        StringRef substr(ldata.data, ldata.size);
3677
22
        StringSearch search {&substr};
3678
3679
44
        for (int i = 0; i < size; ++i) {
3680
22
            res[i] = locate_pos<is_ascii>(substr,
3681
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3682
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3683
22
        }
3684
22
    }
3685
3686
    template <bool is_ascii, bool str_const, bool pos_const>
3687
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
3688
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
3689
330
                       size_t size) const {
3690
330
        res.resize(size);
3691
330
        StringSearch search;
3692
774
        for (int i = 0; i < size; ++i) {
3693
444
            StringRef substr = col_left->get_data_at(i);
3694
444
            search.set_pattern(&substr);
3695
444
            res[i] = locate_pos<is_ascii>(substr,
3696
444
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
444
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
444
        }
3699
330
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
23
                       size_t size) const {
3690
23
        res.resize(size);
3691
23
        StringSearch search;
3692
71
        for (int i = 0; i < size; ++i) {
3693
48
            StringRef substr = col_left->get_data_at(i);
3694
48
            search.set_pattern(&substr);
3695
48
            res[i] = locate_pos<is_ascii>(substr,
3696
48
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
48
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
48
        }
3699
23
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
60
                       size_t size) const {
3690
60
        res.resize(size);
3691
60
        StringSearch search;
3692
120
        for (int i = 0; i < size; ++i) {
3693
60
            StringRef substr = col_left->get_data_at(i);
3694
60
            search.set_pattern(&substr);
3695
60
            res[i] = locate_pos<is_ascii>(substr,
3696
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
60
        }
3699
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
60
                       size_t size) const {
3690
60
        res.resize(size);
3691
60
        StringSearch search;
3692
120
        for (int i = 0; i < size; ++i) {
3693
60
            StringRef substr = col_left->get_data_at(i);
3694
60
            search.set_pattern(&substr);
3695
60
            res[i] = locate_pos<is_ascii>(substr,
3696
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
60
        }
3699
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
60
                       size_t size) const {
3690
60
        res.resize(size);
3691
60
        StringSearch search;
3692
120
        for (int i = 0; i < size; ++i) {
3693
60
            StringRef substr = col_left->get_data_at(i);
3694
60
            search.set_pattern(&substr);
3695
60
            res[i] = locate_pos<is_ascii>(substr,
3696
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
60
        }
3699
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
61
                       size_t size) const {
3690
61
        res.resize(size);
3691
61
        StringSearch search;
3692
211
        for (int i = 0; i < size; ++i) {
3693
150
            StringRef substr = col_left->get_data_at(i);
3694
150
            search.set_pattern(&substr);
3695
150
            res[i] = locate_pos<is_ascii>(substr,
3696
150
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
150
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
150
        }
3699
61
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
22
                       size_t size) const {
3690
22
        res.resize(size);
3691
22
        StringSearch search;
3692
44
        for (int i = 0; i < size; ++i) {
3693
22
            StringRef substr = col_left->get_data_at(i);
3694
22
            search.set_pattern(&substr);
3695
22
            res[i] = locate_pos<is_ascii>(substr,
3696
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
22
        }
3699
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
22
                       size_t size) const {
3690
22
        res.resize(size);
3691
22
        StringSearch search;
3692
44
        for (int i = 0; i < size; ++i) {
3693
22
            StringRef substr = col_left->get_data_at(i);
3694
22
            search.set_pattern(&substr);
3695
22
            res[i] = locate_pos<is_ascii>(substr,
3696
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
22
        }
3699
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEERSA_m
Line
Count
Source
3689
22
                       size_t size) const {
3690
22
        res.resize(size);
3691
22
        StringSearch search;
3692
44
        for (int i = 0; i < size; ++i) {
3693
22
            StringRef substr = col_left->get_data_at(i);
3694
22
            search.set_pattern(&substr);
3695
22
            res[i] = locate_pos<is_ascii>(substr,
3696
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3697
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3698
22
        }
3699
22
    }
3700
3701
    template <bool is_ascii>
3702
690
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3703
690
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3704
            // BEHAVIOR COMPATIBLE WITH MYSQL
3705
            // locate('','')  locate('','',1) locate('','',2)
3706
            // 1  1 0
3707
11
            return 1;
3708
11
        }
3709
679
        if (is_ascii) {
3710
499
            return locate_pos_ascii(substr, str, search, start_pos);
3711
499
        } else {
3712
180
            return locate_pos_utf8(substr, str, search, start_pos);
3713
180
        }
3714
679
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
3702
180
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3703
180
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3704
            // BEHAVIOR COMPATIBLE WITH MYSQL
3705
            // locate('','')  locate('','',1) locate('','',2)
3706
            // 1  1 0
3707
0
            return 1;
3708
0
        }
3709
180
        if (is_ascii) {
3710
0
            return locate_pos_ascii(substr, str, search, start_pos);
3711
180
        } else {
3712
180
            return locate_pos_utf8(substr, str, search, start_pos);
3713
180
        }
3714
180
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
3702
510
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3703
510
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3704
            // BEHAVIOR COMPATIBLE WITH MYSQL
3705
            // locate('','')  locate('','',1) locate('','',2)
3706
            // 1  1 0
3707
11
            return 1;
3708
11
        }
3709
499
        if (is_ascii) {
3710
499
            return locate_pos_ascii(substr, str, search, start_pos);
3711
499
        } else {
3712
0
            return locate_pos_utf8(substr, str, search, start_pos);
3713
0
        }
3714
499
    }
3715
3716
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
3717
180
                        int start_pos) const {
3718
180
        std::vector<size_t> index;
3719
180
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
3720
180
        if (start_pos <= 0 || start_pos > char_len) {
3721
43
            return 0;
3722
43
        }
3723
137
        if (substr.size == 0) {
3724
17
            return start_pos;
3725
17
        }
3726
        // Input start_pos starts from 1.
3727
120
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
3728
120
        int32_t match_pos = search.search(&adjusted_str);
3729
120
        if (match_pos >= 0) {
3730
            // Hive returns the position in the original string starting from 1.
3731
104
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
3732
104
        } else {
3733
16
            return 0;
3734
16
        }
3735
120
    }
3736
3737
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
3738
499
                         int start_pos) const {
3739
499
        if (start_pos <= 0 || start_pos > str.size) {
3740
367
            return 0;
3741
367
        }
3742
132
        if (substr.size == 0) {
3743
36
            return start_pos;
3744
36
        }
3745
        // Input start_pos starts from 1.
3746
96
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
3747
96
        int32_t match_pos = search.search(&adjusted_str);
3748
96
        if (match_pos >= 0) {
3749
            // Hive returns the position in the original string starting from 1.
3750
40
            return start_pos + match_pos;
3751
56
        } else {
3752
56
            return 0;
3753
56
        }
3754
96
    }
3755
};
3756
3757
struct ReplaceImpl {
3758
    static constexpr auto name = "replace";
3759
};
3760
3761
struct ReplaceEmptyImpl {
3762
    static constexpr auto name = "replace_empty";
3763
};
3764
3765
template <typename Impl, bool empty>
3766
class FunctionReplace : public IFunction {
3767
public:
3768
    static constexpr auto name = Impl::name;
3769
3.03k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv
Line
Count
Source
3769
1.51k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv
Line
Count
Source
3769
1.51k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
3770
2
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev
Line
Count
Source
3770
1
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev
Line
Count
Source
3770
1
    String get_name() const override { return name; }
3771
3.02k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv
Line
Count
Source
3771
1.51k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv
Line
Count
Source
3771
1.51k
    size_t get_number_of_arguments() const override { return 3; }
3772
3773
3.02k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3774
3.02k
        return std::make_shared<DataTypeString>();
3775
3.02k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3773
1.51k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3774
1.51k
        return std::make_shared<DataTypeString>();
3775
1.51k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3773
1.51k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3774
1.51k
        return std::make_shared<DataTypeString>();
3775
1.51k
    }
3776
3777
2
    DataTypes get_variadic_argument_types_impl() const override {
3778
2
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3779
2
                std::make_shared<DataTypeString>()};
3780
2
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv
Line
Count
Source
3777
1
    DataTypes get_variadic_argument_types_impl() const override {
3778
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3779
1
                std::make_shared<DataTypeString>()};
3780
1
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv
Line
Count
Source
3777
1
    DataTypes get_variadic_argument_types_impl() const override {
3778
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3779
1
                std::make_shared<DataTypeString>()};
3780
1
    }
3781
3782
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3783
1.75k
                        uint32_t result, size_t input_rows_count) const override {
3784
        // We need a local variable to hold a reference to the converted column.
3785
        // So that the converted column will not be released before we use it.
3786
1.75k
        ColumnPtr col[3];
3787
1.75k
        bool col_const[3];
3788
7.00k
        for (size_t i = 0; i < 3; ++i) {
3789
5.25k
            std::tie(col[i], col_const[i]) =
3790
5.25k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3791
5.25k
        }
3792
3793
1.75k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3794
1.75k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3795
1.75k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3796
3797
1.75k
        ColumnString::MutablePtr col_res = ColumnString::create();
3798
3799
1.75k
        std::visit(
3800
1.75k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
3.93k
                    for (int i = 0; i < input_rows_count; ++i) {
3802
2.18k
                        StringRef origin_str =
3803
2.18k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
2.18k
                        StringRef old_str =
3805
2.18k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
2.18k
                        StringRef new_str =
3807
2.18k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
2.18k
                        std::string result =
3810
2.18k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
2.18k
                                        new_str.to_string_view());
3812
3813
2.18k
                        col_res->insert_data(result.data(), result.length());
3814
2.18k
                    }
3815
1.75k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
3800
126
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
467
                    for (int i = 0; i < input_rows_count; ++i) {
3802
341
                        StringRef origin_str =
3803
341
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
341
                        StringRef old_str =
3805
341
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
341
                        StringRef new_str =
3807
341
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
341
                        std::string result =
3810
341
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
341
                                        new_str.to_string_view());
3812
3813
341
                        col_res->insert_data(result.data(), result.length());
3814
341
                    }
3815
126
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
3800
126
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
467
                    for (int i = 0; i < input_rows_count; ++i) {
3802
341
                        StringRef origin_str =
3803
341
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
341
                        StringRef old_str =
3805
341
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
341
                        StringRef new_str =
3807
341
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
341
                        std::string result =
3810
341
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
341
                                        new_str.to_string_view());
3812
3813
341
                        col_res->insert_data(result.data(), result.length());
3814
341
                    }
3815
126
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
3800
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
250
                    for (int i = 0; i < input_rows_count; ++i) {
3802
125
                        StringRef origin_str =
3803
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
125
                        StringRef old_str =
3805
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
125
                        StringRef new_str =
3807
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
125
                        std::string result =
3810
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
125
                                        new_str.to_string_view());
3812
3813
125
                        col_res->insert_data(result.data(), result.length());
3814
125
                    }
3815
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
3816
1.75k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3817
1.75k
                make_bool_variant(col_const[2]));
3818
3819
1.75k
        block.replace_by_position(result, std::move(col_res));
3820
1.75k
        return Status::OK();
3821
1.75k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3783
876
                        uint32_t result, size_t input_rows_count) const override {
3784
        // We need a local variable to hold a reference to the converted column.
3785
        // So that the converted column will not be released before we use it.
3786
876
        ColumnPtr col[3];
3787
876
        bool col_const[3];
3788
3.50k
        for (size_t i = 0; i < 3; ++i) {
3789
2.62k
            std::tie(col[i], col_const[i]) =
3790
2.62k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3791
2.62k
        }
3792
3793
876
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3794
876
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3795
876
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3796
3797
876
        ColumnString::MutablePtr col_res = ColumnString::create();
3798
3799
876
        std::visit(
3800
876
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
876
                    for (int i = 0; i < input_rows_count; ++i) {
3802
876
                        StringRef origin_str =
3803
876
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
876
                        StringRef old_str =
3805
876
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
876
                        StringRef new_str =
3807
876
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
876
                        std::string result =
3810
876
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
876
                                        new_str.to_string_view());
3812
3813
876
                        col_res->insert_data(result.data(), result.length());
3814
876
                    }
3815
876
                },
3816
876
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3817
876
                make_bool_variant(col_const[2]));
3818
3819
876
        block.replace_by_position(result, std::move(col_res));
3820
876
        return Status::OK();
3821
876
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3783
876
                        uint32_t result, size_t input_rows_count) const override {
3784
        // We need a local variable to hold a reference to the converted column.
3785
        // So that the converted column will not be released before we use it.
3786
876
        ColumnPtr col[3];
3787
876
        bool col_const[3];
3788
3.50k
        for (size_t i = 0; i < 3; ++i) {
3789
2.62k
            std::tie(col[i], col_const[i]) =
3790
2.62k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3791
2.62k
        }
3792
3793
876
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3794
876
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3795
876
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3796
3797
876
        ColumnString::MutablePtr col_res = ColumnString::create();
3798
3799
876
        std::visit(
3800
876
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3801
876
                    for (int i = 0; i < input_rows_count; ++i) {
3802
876
                        StringRef origin_str =
3803
876
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3804
876
                        StringRef old_str =
3805
876
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3806
876
                        StringRef new_str =
3807
876
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3808
3809
876
                        std::string result =
3810
876
                                replace(origin_str.to_string(), old_str.to_string_view(),
3811
876
                                        new_str.to_string_view());
3812
3813
876
                        col_res->insert_data(result.data(), result.length());
3814
876
                    }
3815
876
                },
3816
876
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3817
876
                make_bool_variant(col_const[2]));
3818
3819
876
        block.replace_by_position(result, std::move(col_res));
3820
876
        return Status::OK();
3821
876
    }
3822
3823
private:
3824
2.18k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3825
2.18k
        if (old_str.empty()) {
3826
494
            if constexpr (empty) {
3827
247
                return str;
3828
247
            } else {
3829
                // Different from "Replace" only when the search string is empty.
3830
                // it will insert `new_str` in front of every character and at the end of the old str.
3831
247
                if (new_str.empty()) {
3832
59
                    return str;
3833
59
                }
3834
188
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3835
188
                    std::string result;
3836
188
                    ColumnString::check_chars_length(
3837
188
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3838
188
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3839
648
                    for (char c : str) {
3840
648
                        result += new_str;
3841
648
                        result += c;
3842
648
                    }
3843
188
                    result += new_str;
3844
188
                    return result;
3845
188
                } else {
3846
0
                    std::string result;
3847
0
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3848
0
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3849
0
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3850
0
                        result += new_str;
3851
0
                        result.append(&str[i], utf8_char_len);
3852
0
                    }
3853
0
                    result += new_str;
3854
0
                    ColumnString::check_chars_length(result.size(), 0);
3855
0
                    return result;
3856
0
                }
3857
188
            }
3858
1.68k
        } else {
3859
1.68k
            std::string::size_type pos = 0;
3860
1.68k
            std::string::size_type oldLen = old_str.size();
3861
1.68k
            std::string::size_type newLen = new_str.size();
3862
2.34k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3863
656
                str.replace(pos, oldLen, new_str);
3864
656
                pos += newLen;
3865
656
            }
3866
1.68k
            return str;
3867
1.68k
        }
3868
2.18k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
3824
1.09k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3825
1.09k
        if (old_str.empty()) {
3826
247
            if constexpr (empty) {
3827
247
                return str;
3828
            } else {
3829
                // Different from "Replace" only when the search string is empty.
3830
                // it will insert `new_str` in front of every character and at the end of the old str.
3831
                if (new_str.empty()) {
3832
                    return str;
3833
                }
3834
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3835
                    std::string result;
3836
                    ColumnString::check_chars_length(
3837
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3838
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3839
                    for (char c : str) {
3840
                        result += new_str;
3841
                        result += c;
3842
                    }
3843
                    result += new_str;
3844
                    return result;
3845
                } else {
3846
                    std::string result;
3847
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3848
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3849
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3850
                        result += new_str;
3851
                        result.append(&str[i], utf8_char_len);
3852
                    }
3853
                    result += new_str;
3854
                    ColumnString::check_chars_length(result.size(), 0);
3855
                    return result;
3856
                }
3857
            }
3858
844
        } else {
3859
844
            std::string::size_type pos = 0;
3860
844
            std::string::size_type oldLen = old_str.size();
3861
844
            std::string::size_type newLen = new_str.size();
3862
1.17k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3863
328
                str.replace(pos, oldLen, new_str);
3864
328
                pos += newLen;
3865
328
            }
3866
844
            return str;
3867
844
        }
3868
1.09k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
3824
1.09k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3825
1.09k
        if (old_str.empty()) {
3826
            if constexpr (empty) {
3827
                return str;
3828
247
            } else {
3829
                // Different from "Replace" only when the search string is empty.
3830
                // it will insert `new_str` in front of every character and at the end of the old str.
3831
247
                if (new_str.empty()) {
3832
59
                    return str;
3833
59
                }
3834
188
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3835
188
                    std::string result;
3836
188
                    ColumnString::check_chars_length(
3837
188
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3838
188
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3839
648
                    for (char c : str) {
3840
648
                        result += new_str;
3841
648
                        result += c;
3842
648
                    }
3843
188
                    result += new_str;
3844
188
                    return result;
3845
188
                } else {
3846
0
                    std::string result;
3847
0
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3848
0
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3849
0
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3850
0
                        result += new_str;
3851
0
                        result.append(&str[i], utf8_char_len);
3852
0
                    }
3853
0
                    result += new_str;
3854
0
                    ColumnString::check_chars_length(result.size(), 0);
3855
0
                    return result;
3856
0
                }
3857
188
            }
3858
844
        } else {
3859
844
            std::string::size_type pos = 0;
3860
844
            std::string::size_type oldLen = old_str.size();
3861
844
            std::string::size_type newLen = new_str.size();
3862
1.17k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3863
328
                str.replace(pos, oldLen, new_str);
3864
328
                pos += newLen;
3865
328
            }
3866
844
            return str;
3867
844
        }
3868
1.09k
    }
3869
};
3870
3871
struct ReverseImpl {
3872
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
3873
53
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
3874
53
        auto rows_count = offsets.size();
3875
53
        res_offsets.resize(rows_count);
3876
53
        res_data.reserve(data.size());
3877
158
        for (ssize_t i = 0; i < rows_count; ++i) {
3878
105
            auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
3879
105
            int64_t src_len = offsets[i] - offsets[i - 1];
3880
105
            std::string dst;
3881
105
            dst.resize(src_len);
3882
105
            simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst);
3883
105
            StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data,
3884
105
                                        res_offsets);
3885
105
        }
3886
53
        return Status::OK();
3887
53
    }
3888
};
3889
3890
template <typename Impl>
3891
class FunctionSubReplace : public IFunction {
3892
public:
3893
    static constexpr auto name = "sub_replace";
3894
3895
4
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv
Line
Count
Source
3895
2
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv
Line
Count
Source
3895
2
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
3896
3897
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev
3898
3899
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3900
0
        return make_nullable(std::make_shared<DataTypeString>());
3901
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
3902
3903
2
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv
Line
Count
Source
3903
1
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv
Line
Count
Source
3903
1
    bool is_variadic() const override { return true; }
3904
3905
2
    DataTypes get_variadic_argument_types_impl() const override {
3906
2
        return Impl::get_variadic_argument_types();
3907
2
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
3905
1
    DataTypes get_variadic_argument_types_impl() const override {
3906
1
        return Impl::get_variadic_argument_types();
3907
1
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv
Line
Count
Source
3905
1
    DataTypes get_variadic_argument_types_impl() const override {
3906
1
        return Impl::get_variadic_argument_types();
3907
1
    }
3908
3909
0
    size_t get_number_of_arguments() const override {
3910
0
        return get_variadic_argument_types_impl().size();
3911
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv
3912
3913
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3914
0
                        uint32_t result, size_t input_rows_count) const override {
3915
0
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
3916
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
3917
};
3918
3919
struct SubReplaceImpl {
3920
    static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
3921
1
                                  size_t input_rows_count) {
3922
1
        auto res_column = ColumnString::create();
3923
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
3924
1
        auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
3925
1
        ColumnPtr argument_columns[4];
3926
1
        bool col_const[4];
3927
5
        for (int i = 0; i < 4; ++i) {
3928
4
            std::tie(argument_columns[i], col_const[i]) =
3929
4
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3930
4
        }
3931
1
        const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
3932
1
        const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
3933
1
        const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get());
3934
1
        const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get());
3935
3936
1
        std::visit(
3937
1
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
3938
1
                    if (data_column->is_ascii()) {
3939
1
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
3940
1
                                data_column, mask_column, start_column->get_data(),
3941
1
                                length_column->get_data(), args_null_map->get_data(), result_column,
3942
1
                                input_rows_count);
3943
1
                    } else {
3944
0
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
3945
0
                                data_column, mask_column, start_column->get_data(),
3946
0
                                length_column->get_data(), args_null_map->get_data(), result_column,
3947
0
                                input_rows_count);
3948
0
                    }
3949
1
                },
_ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
3937
1
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
3938
1
                    if (data_column->is_ascii()) {
3939
1
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
3940
1
                                data_column, mask_column, start_column->get_data(),
3941
1
                                length_column->get_data(), args_null_map->get_data(), result_column,
3942
1
                                input_rows_count);
3943
1
                    } else {
3944
0
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
3945
0
                                data_column, mask_column, start_column->get_data(),
3946
0
                                length_column->get_data(), args_null_map->get_data(), result_column,
3947
0
                                input_rows_count);
3948
0
                    }
3949
1
                },
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_
3950
1
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3951
1
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
3952
1
        block.get_by_position(result).column =
3953
1
                ColumnNullable::create(std::move(res_column), std::move(args_null_map));
3954
1
        return Status::OK();
3955
1
    }
3956
3957
private:
3958
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
3959
    static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column,
3960
                             const PaddedPODArray<Int32>& args_start,
3961
                             const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
3962
1
                             ColumnString* result_column, size_t input_rows_count) {
3963
1
        ColumnString::Chars& res_chars = result_column->get_chars();
3964
1
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3965
10.2k
        for (size_t row = 0; row < input_rows_count; ++row) {
3966
10.2k
            StringRef origin_str =
3967
10.2k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3968
10.2k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3969
10.2k
            const auto start = args_start[index_check_const<start_const>(row)];
3970
10.2k
            const auto length = args_length[index_check_const<len_const>(row)];
3971
10.2k
            const size_t origin_str_len = origin_str.size;
3972
            //input is null, start < 0, len < 0, str_size <= start. return NULL
3973
10.2k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
3974
10.2k
                res_offsets.push_back(res_chars.size());
3975
10.2k
                args_null_map[row] = 1;
3976
10.2k
            } else {
3977
0
                std::string_view replace_str = new_str.to_string_view();
3978
0
                std::string result = origin_str.to_string();
3979
0
                result.replace(start, length, replace_str);
3980
0
                result_column->insert_data(result.data(), result.length());
3981
0
            }
3982
10.2k
        }
3983
1
    }
_ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
3962
1
                             ColumnString* result_column, size_t input_rows_count) {
3963
1
        ColumnString::Chars& res_chars = result_column->get_chars();
3964
1
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3965
10.2k
        for (size_t row = 0; row < input_rows_count; ++row) {
3966
10.2k
            StringRef origin_str =
3967
10.2k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3968
10.2k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3969
10.2k
            const auto start = args_start[index_check_const<start_const>(row)];
3970
10.2k
            const auto length = args_length[index_check_const<len_const>(row)];
3971
10.2k
            const size_t origin_str_len = origin_str.size;
3972
            //input is null, start < 0, len < 0, str_size <= start. return NULL
3973
10.2k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
3974
10.2k
                res_offsets.push_back(res_chars.size());
3975
10.2k
                args_null_map[row] = 1;
3976
10.2k
            } else {
3977
0
                std::string_view replace_str = new_str.to_string_view();
3978
0
                std::string result = origin_str.to_string();
3979
0
                result.replace(start, length, replace_str);
3980
0
                result_column->insert_data(result.data(), result.length());
3981
0
            }
3982
10.2k
        }
3983
1
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
3984
3985
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
3986
    static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column,
3987
                            const PaddedPODArray<Int32>& args_start,
3988
                            const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
3989
0
                            ColumnString* result_column, size_t input_rows_count) {
3990
0
        ColumnString::Chars& res_chars = result_column->get_chars();
3991
0
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3992
3993
0
        for (size_t row = 0; row < input_rows_count; ++row) {
3994
0
            StringRef origin_str =
3995
0
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3996
0
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3997
0
            const auto start = args_start[index_check_const<start_const>(row)];
3998
0
            const auto length = args_length[index_check_const<len_const>(row)];
3999
            //input is null, start < 0, len < 0 return NULL
4000
0
            if (args_null_map[row] || start < 0 || length < 0) {
4001
0
                res_offsets.push_back(res_chars.size());
4002
0
                args_null_map[row] = 1;
4003
0
                continue;
4004
0
            }
4005
4006
0
            const auto [start_byte_len, start_char_len] =
4007
0
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
4008
0
                                                                           origin_str.end(), start);
4009
4010
            // start >= orgin.size
4011
0
            DCHECK(start_char_len <= start);
4012
0
            if (start_byte_len == origin_str.size) {
4013
0
                res_offsets.push_back(res_chars.size());
4014
0
                args_null_map[row] = 1;
4015
0
                continue;
4016
0
            }
4017
4018
0
            auto [end_byte_len, end_char_len] =
4019
0
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
4020
0
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
4021
0
            DCHECK(end_char_len <= length);
4022
0
            std::string_view replace_str = new_str.to_string_view();
4023
0
            std::string result = origin_str.to_string();
4024
0
            result.replace(start_byte_len, end_byte_len, replace_str);
4025
0
            result_column->insert_data(result.data(), result.length());
4026
0
        }
4027
0
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb0EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
4028
};
4029
4030
struct SubReplaceThreeImpl {
4031
1
    static DataTypes get_variadic_argument_types() {
4032
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
4033
1
                std::make_shared<DataTypeInt32>()};
4034
1
    }
4035
4036
    static Status execute_impl(FunctionContext* context, Block& block,
4037
                               const ColumnNumbers& arguments, uint32_t result,
4038
0
                               size_t input_rows_count) {
4039
0
        auto params = ColumnInt32::create(input_rows_count);
4040
0
        auto& strlen_data = params->get_data();
4041
4042
0
        auto str_col =
4043
0
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
4044
0
        if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
4045
0
            str_col = nullable->get_nested_column_ptr();
4046
0
        }
4047
0
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
4048
        // use utf8 len
4049
0
        for (int i = 0; i < input_rows_count; ++i) {
4050
0
            StringRef str_ref = str_column->get_data_at(i);
4051
0
            strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size);
4052
0
        }
4053
4054
0
        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
4055
0
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
4056
0
                                        block.columns() - 1};
4057
0
        return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
4058
0
    }
4059
};
4060
4061
struct SubReplaceFourImpl {
4062
1
    static DataTypes get_variadic_argument_types() {
4063
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
4064
1
                std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
4065
1
    }
4066
4067
    static Status execute_impl(FunctionContext* context, Block& block,
4068
                               const ColumnNumbers& arguments, uint32_t result,
4069
0
                               size_t input_rows_count) {
4070
0
        return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
4071
0
    }
4072
};
4073
4074
class FunctionConvertTo : public IFunction {
4075
public:
4076
    static constexpr auto name = "convert_to";
4077
4078
2
    static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
4079
4080
1
    String get_name() const override { return name; }
4081
4082
0
    size_t get_number_of_arguments() const override { return 2; }
4083
4084
0
    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
4085
0
        return std::make_shared<DataTypeString>();
4086
0
    }
4087
4088
0
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
4089
0
        if (scope != FunctionContext::THREAD_LOCAL) {
4090
0
            return Status::OK();
4091
0
        }
4092
0
        if (!context->is_col_constant(1)) {
4093
0
            return Status::InvalidArgument(
4094
0
                    "character argument to convert function must be constant.");
4095
0
        }
4096
0
        const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
4097
0
        if (!iequal(character_data.to_string(), "gbk")) {
4098
0
            return Status::RuntimeError(
4099
0
                    "Illegal second argument column of function convert. now only support "
4100
0
                    "convert to character set of gbk");
4101
0
        }
4102
4103
0
        return Status::OK();
4104
0
    }
4105
4106
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4107
0
                        uint32_t result, size_t input_rows_count) const override {
4108
0
        ColumnPtr argument_column =
4109
0
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
4110
0
        const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
4111
0
        const auto& str_offset = str_col->get_offsets();
4112
0
        const auto& str_chars = str_col->get_chars();
4113
0
        auto col_res = ColumnString::create();
4114
0
        auto& res_offset = col_res->get_offsets();
4115
0
        auto& res_chars = col_res->get_chars();
4116
0
        res_offset.resize(input_rows_count);
4117
        // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3
4118
0
        size_t pinyin_size = (str_chars.size() + 2) / 3 * 7;
4119
0
        ColumnString::check_chars_length(pinyin_size, 0);
4120
0
        res_chars.resize(pinyin_size);
4121
4122
0
        size_t in_len = 0, out_len = 0;
4123
0
        for (int i = 0; i < input_rows_count; ++i) {
4124
0
            in_len = str_offset[i] - str_offset[i - 1];
4125
0
            const char* in = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
4126
0
            char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
4127
0
            _utf8_to_pinyin(in, in_len, out, &out_len);
4128
0
            res_offset[i] = res_offset[i - 1] + out_len;
4129
0
        }
4130
0
        res_chars.resize(res_offset[input_rows_count - 1]);
4131
0
        block.replace_by_position(result, std::move(col_res));
4132
0
        return Status::OK();
4133
0
    }
4134
4135
0
    void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) const {
4136
0
        auto do_memcpy = [](char*& dest, const char*& from, size_t size) {
4137
0
            memcpy_small_allow_read_write_overflow15(dest, from, size);
4138
0
            dest += size;
4139
0
            from += size;
4140
0
        };
4141
0
        auto from = in;
4142
0
        auto dest = out;
4143
4144
0
        while (from - in < in_len) {
4145
0
            auto length = get_utf8_byte_length(*from);
4146
0
            if (length != 3) {
4147
0
                do_memcpy(dest, from, length);
4148
0
            } else {
4149
                // convert utf8 to unicode code to get pinyin offset
4150
0
                if (auto tmp = (((int)(*from & 0x0F)) << 12) | (((int)(*(from + 1) & 0x3F)) << 6) |
4151
0
                               (*(from + 2) & 0x3F);
4152
0
                    tmp >= START_UNICODE_OFFSET and tmp < END_UNICODE_OFFSET) {
4153
0
                    const char* buf = nullptr;
4154
0
                    if (tmp >= START_UNICODE_OFFSET && tmp < MID_UNICODE_OFFSET) {
4155
0
                        buf = PINYIN_DICT1 + (tmp - START_UNICODE_OFFSET) * MAX_PINYIN_LEN;
4156
0
                    } else if (tmp >= MID_UNICODE_OFFSET && tmp < END_UNICODE_OFFSET) {
4157
0
                        buf = PINYIN_DICT2 + (tmp - MID_UNICODE_OFFSET) * MAX_PINYIN_LEN;
4158
0
                    }
4159
4160
0
                    auto end = strchr(buf, ' ');
4161
                    // max len for pinyin is 6
4162
0
                    int len = MAX_PINYIN_LEN;
4163
0
                    if (end != nullptr && end - buf < MAX_PINYIN_LEN) {
4164
0
                        len = end - buf;
4165
0
                    }
4166
                    // set first char '~' just make sure all english word lower than chinese word
4167
0
                    *dest = 126;
4168
0
                    memcpy(dest + 1, buf, len);
4169
0
                    dest += (len + 1);
4170
0
                    from += 3;
4171
0
                } else {
4172
0
                    do_memcpy(dest, from, 3);
4173
0
                }
4174
0
            }
4175
0
        }
4176
4177
0
        *out_len = dest - out;
4178
0
    }
4179
};
4180
4181
// refer to https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char
4182
//      UTF8
4183
// 多  0xe5, 0xa4, 0x9a  0xb6, 0xe0
4184
// 睿  0xe7, 0x9d, 0xbf  0xee, 0xa3
4185
// 丝  0xe4, 0xb8, 0x9d  0xcb, 0xbf 14989469
4186
// MySQL behaviour:
4187
// mysql> select char(0xe4, 0xb8, 0x9d using utf8);
4188
// +-----------------------------------+
4189
// | char(0xe4, 0xb8, 0x9d using utf8) |
4190
// +-----------------------------------+
4191
// | 丝                                |
4192
// +-----------------------------------+
4193
// 1 row in set, 1 warning (0.00 sec)
4194
// mysql> select char(14989469 using utf8);
4195
// +---------------------------+
4196
// | char(14989469 using utf8) |
4197
// +---------------------------+
4198
// | 丝                        |
4199
// +---------------------------+
4200
// 1 row in set, 1 warning (0.00 sec)
4201
// mysql> select char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8);
4202
// +---------------------------------------------------------------------------------------------+
4203
// | char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8) |
4204
// +---------------------------------------------------------------------------------------------+
4205
// | 多睿丝 Doris                                                                                 |
4206
// +---------------------------------------------------------------------------------------------+
4207
// mysql> select char(68, 111, 114, 0, 105, null, 115 using utf8);
4208
// +--------------------------------------------------+
4209
// | char(68, 111, 114, 0, 105, null, 115 using utf8) |
4210
// +--------------------------------------------------+
4211
// | Dor is                                           |
4212
// +--------------------------------------------------+
4213
4214
// return null:
4215
// mysql>  select char(255 using utf8);
4216
// +----------------------+
4217
// | char(255 using utf8) |
4218
// +----------------------+
4219
// | NULL                 |
4220
// +----------------------+
4221
// 1 row in set, 2 warnings (0.00 sec)
4222
//
4223
// mysql> show warnings;
4224
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4225
// | Level   | Code | Message                                                                                                                                                                     |
4226
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4227
// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. |
4228
// | Warning | 1300 | Invalid utf8mb3 character string: 'FF'                                                                                                                                      |
4229
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4230
// 2 rows in set (0.01 sec)
4231
4232
// max int value:
4233
// mysql> select char(18446744073709551615);
4234
// +--------------------------------------------------------+
4235
// | char(18446744073709551615)                             |
4236
// +--------------------------------------------------------+
4237
// | 0xFFFFFFFF                                             |
4238
// +--------------------------------------------------------+
4239
// 1 row in set (0.00 sec)
4240
//
4241
// mysql> select char(18446744073709551616);
4242
// +--------------------------------------------------------+
4243
// | char(18446744073709551616)                             |
4244
// +--------------------------------------------------------+
4245
// | 0xFFFFFFFF                                             |
4246
// +--------------------------------------------------------+
4247
// 1 row in set, 1 warning (0.00 sec)
4248
//
4249
// mysql> show warnings;
4250
// +---------+------+-----------------------------------------------------------+
4251
// | Level   | Code | Message                                                   |
4252
// +---------+------+-----------------------------------------------------------+
4253
// | Warning | 1292 | Truncated incorrect DECIMAL value: '18446744073709551616' |
4254
// +---------+------+-----------------------------------------------------------+
4255
// 1 row in set (0.00 sec)
4256
4257
// table columns:
4258
// mysql> select * from t;
4259
// +------+------+------+
4260
// | f1   | f2   | f3   |
4261
// +------+------+------+
4262
// |  228 |  184 |  157 |
4263
// |  228 |  184 |    0 |
4264
// |  228 |  184 |   99 |
4265
// |   99 |  228 |  184 |
4266
// +------+------+------+
4267
// 4 rows in set (0.00 sec)
4268
//
4269
// mysql> select char(f1, f2, f3 using utf8) from t;
4270
// +-----------------------------+
4271
// | char(f1, f2, f3 using utf8) |
4272
// +-----------------------------+
4273
// | 丝                          |
4274
// |                             |
4275
// |                             |
4276
// | c                           |
4277
// +-----------------------------+
4278
// 4 rows in set, 4 warnings (0.00 sec)
4279
//
4280
// mysql> show warnings;
4281
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4282
// | Level   | Code | Message                                                                                                                                                                     |
4283
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4284
// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. |
4285
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B800'                                                                                                                                  |
4286
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B863'                                                                                                                                  |
4287
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B8'                                                                                                                                    |
4288
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4289
class FunctionIntToChar : public IFunction {
4290
public:
4291
    static constexpr auto name = "char";
4292
2
    static FunctionPtr create() { return std::make_shared<FunctionIntToChar>(); }
4293
0
    String get_name() const override { return name; }
4294
0
    size_t get_number_of_arguments() const override { return 0; }
4295
1
    bool is_variadic() const override { return true; }
4296
4297
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4298
0
        return make_nullable(std::make_shared<DataTypeString>());
4299
0
    }
4300
0
    bool use_default_implementation_for_nulls() const override { return false; }
4301
4302
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4303
0
                        uint32_t result, size_t input_rows_count) const override {
4304
0
        DCHECK_GE(arguments.size(), 2);
4305
4306
0
        int argument_size = arguments.size();
4307
0
        std::vector<ColumnPtr> str_columns(argument_size - 1);
4308
0
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size - 1);
4309
0
        std::vector<const ColumnString::Chars*> chars_list(argument_size - 1);
4310
4311
        // convert each argument columns to column string and then concat the string columns
4312
0
        for (size_t i = 1; i < argument_size; ++i) {
4313
0
            if (auto const_column = check_and_get_column<const ColumnConst>(
4314
0
                        *block.get_by_position(arguments[i]).column)) {
4315
                // ignore null
4316
0
                if (const_column->only_null()) {
4317
0
                    str_columns[i - 1] = nullptr;
4318
0
                } else {
4319
0
                    auto str_column = ColumnString::create();
4320
0
                    auto& chars = str_column->get_chars();
4321
0
                    auto& offsets = str_column->get_offsets();
4322
0
                    offsets.resize(1);
4323
0
                    const ColumnInt32* int_column;
4324
0
                    if (auto* nullable = check_and_get_column<const ColumnNullable>(
4325
0
                                const_column->get_data_column())) {
4326
0
                        int_column = assert_cast<const ColumnInt32*>(
4327
0
                                nullable->get_nested_column_ptr().get());
4328
0
                    } else {
4329
0
                        int_column =
4330
0
                                assert_cast<const ColumnInt32*>(&const_column->get_data_column());
4331
0
                    }
4332
0
                    int int_val = int_column->get_int(0);
4333
0
                    integer_to_char_(0, &int_val, chars, offsets);
4334
0
                    str_columns[i - 1] =
4335
0
                            ColumnConst::create(std::move(str_column), input_rows_count);
4336
0
                }
4337
0
                offsets_list[i - 1] = nullptr;
4338
0
                chars_list[i - 1] = nullptr;
4339
0
            } else {
4340
0
                auto str_column = ColumnString::create();
4341
0
                auto& chars = str_column->get_chars();
4342
0
                auto& offsets = str_column->get_offsets();
4343
                // data.resize(input_rows_count);
4344
0
                offsets.resize(input_rows_count);
4345
4346
0
                if (auto nullable = check_and_get_column<const ColumnNullable>(
4347
0
                            *block.get_by_position(arguments[i]).column)) {
4348
0
                    const auto* int_data =
4349
0
                            assert_cast<const ColumnInt32*>(nullable->get_nested_column_ptr().get())
4350
0
                                    ->get_data()
4351
0
                                    .data();
4352
0
                    const auto* null_map_data = nullable->get_null_map_data().data();
4353
0
                    for (size_t j = 0; j < input_rows_count; ++j) {
4354
                        // ignore null
4355
0
                        if (null_map_data[j]) {
4356
0
                            offsets[j] = offsets[j - 1];
4357
0
                        } else {
4358
0
                            integer_to_char_(j, int_data + j, chars, offsets);
4359
0
                        }
4360
0
                    }
4361
0
                } else {
4362
0
                    const auto* int_data = assert_cast<const ColumnInt32*>(
4363
0
                                                   block.get_by_position(arguments[i]).column.get())
4364
0
                                                   ->get_data()
4365
0
                                                   .data();
4366
0
                    for (size_t j = 0; j < input_rows_count; ++j) {
4367
0
                        integer_to_char_(j, int_data + j, chars, offsets);
4368
0
                    }
4369
0
                }
4370
0
                offsets_list[i - 1] = &str_column->get_offsets();
4371
0
                chars_list[i - 1] = &str_column->get_chars();
4372
0
                str_columns[i - 1] = std::move(str_column);
4373
0
            }
4374
0
        }
4375
4376
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
4377
0
        auto res = ColumnString::create();
4378
0
        auto& res_data = res->get_chars();
4379
0
        auto& res_offset = res->get_offsets();
4380
4381
0
        size_t res_reserve_size = 0;
4382
0
        for (size_t i = 0; i < argument_size - 1; ++i) {
4383
0
            if (!str_columns[i]) {
4384
0
                continue;
4385
0
            }
4386
0
            if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[i])) {
4387
0
                auto str_column =
4388
0
                        assert_cast<const ColumnString*>(&(const_column->get_data_column()));
4389
0
                auto& offsets = str_column->get_offsets();
4390
0
                res_reserve_size += (offsets[0] - offsets[-1]) * input_rows_count;
4391
0
            } else {
4392
0
                for (size_t j = 0; j < input_rows_count; ++j) {
4393
0
                    size_t append = (*offsets_list[i])[j] - (*offsets_list[i])[j - 1];
4394
                    // check whether the output might overflow(unlikely)
4395
0
                    if (UNLIKELY(UINT_MAX - append < res_reserve_size)) {
4396
0
                        return Status::BufferAllocFailed(
4397
0
                                "function char output is too large to allocate");
4398
0
                    }
4399
0
                    res_reserve_size += append;
4400
0
                }
4401
0
            }
4402
0
        }
4403
0
        if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
4404
0
            return Status::BufferAllocFailed("function char output is too large to allocate");
4405
0
        }
4406
0
        ColumnString::check_chars_length(res_reserve_size, 0);
4407
0
        res_data.resize(res_reserve_size);
4408
0
        res_offset.resize(input_rows_count);
4409
4410
0
        for (size_t i = 0; i < input_rows_count; ++i) {
4411
0
            int current_length = 0;
4412
0
            for (size_t j = 0; j < argument_size - 1; ++j) {
4413
0
                if (!str_columns[j]) {
4414
0
                    continue;
4415
0
                }
4416
0
                if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[j])) {
4417
0
                    auto str_column = assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(
4418
0
                            &(const_column->get_data_column()));
4419
0
                    auto data_item = str_column->get_data_at(0);
4420
0
                    memcpy_small_allow_read_write_overflow15(
4421
0
                            &res_data[res_offset[i - 1]] + current_length, data_item.data,
4422
0
                            data_item.size);
4423
0
                    current_length += data_item.size;
4424
0
                } else {
4425
0
                    auto& current_offsets = *offsets_list[j];
4426
0
                    auto& current_chars = *chars_list[j];
4427
4428
0
                    int size = current_offsets[i] - current_offsets[i - 1];
4429
0
                    if (size > 0) {
4430
0
                        memcpy_small_allow_read_write_overflow15(
4431
0
                                &res_data[res_offset[i - 1]] + current_length,
4432
0
                                &current_chars[current_offsets[i - 1]], size);
4433
0
                        current_length += size;
4434
0
                    }
4435
0
                }
4436
0
            }
4437
0
            res_offset[i] = res_offset[i - 1] + current_length;
4438
0
        }
4439
4440
        // validate utf8
4441
0
        auto* null_map_data = null_map->get_data().data();
4442
0
        for (size_t i = 0; i < input_rows_count; ++i) {
4443
0
            if (!validate_utf8((const char*)(&res_data[res_offset[i - 1]]),
4444
0
                               res_offset[i] - res_offset[i - 1])) {
4445
0
                null_map_data[i] = 1;
4446
0
            }
4447
0
        }
4448
4449
0
        block.get_by_position(result).column =
4450
0
                ColumnNullable::create(std::move(res), std::move(null_map));
4451
0
        return Status::OK();
4452
0
    }
4453
4454
private:
4455
    void integer_to_char_(int line_num, const int* num, ColumnString::Chars& chars,
4456
0
                          IColumn::Offsets& offsets) const {
4457
0
        if (0 == *num) {
4458
0
            chars.push_back('\0');
4459
0
            offsets[line_num] = offsets[line_num - 1] + 1;
4460
0
            return;
4461
0
        }
4462
0
        const char* bytes = (const char*)(num);
4463
0
        if constexpr (std::endian::native == std::endian::little) {
4464
0
            int k = 3;
4465
0
            for (; k >= 0; --k) {
4466
0
                if (bytes[k]) {
4467
0
                    break;
4468
0
                }
4469
0
            }
4470
0
            offsets[line_num] = offsets[line_num - 1] + k + 1;
4471
0
            for (; k >= 0; --k) {
4472
0
                chars.push_back(bytes[k] ? bytes[k] : '\0');
4473
0
            }
4474
        } else if constexpr (std::endian::native == std::endian::big) {
4475
            int k = 0;
4476
            for (; k < 4; ++k) {
4477
                if (bytes[k]) {
4478
                    break;
4479
                }
4480
            }
4481
            offsets[line_num] = offsets[line_num - 1] + 4 - k;
4482
            for (; k < 4; ++k) {
4483
                chars.push_back(bytes[k] ? bytes[k] : '\0');
4484
            }
4485
        } else {
4486
            static_assert(std::endian::native == std::endian::big ||
4487
                                  std::endian::native == std::endian::little,
4488
                          "Unsupported endianness");
4489
        }
4490
0
    }
4491
};
4492
4493
class FunctionOverlay : public IFunction {
4494
public:
4495
    static constexpr auto name = "overlay";
4496
19
    static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); }
4497
1
    String get_name() const override { return name; }
4498
17
    size_t get_number_of_arguments() const override { return 4; }
4499
4500
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4501
17
        return std::make_shared<DataTypeString>();
4502
17
    }
4503
4504
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4505
13
                        uint32_t result, size_t input_rows_count) const override {
4506
13
        DCHECK_EQ(arguments.size(), 4);
4507
4508
13
        bool col_const[4];
4509
13
        ColumnPtr argument_columns[4];
4510
65
        for (int i = 0; i < 4; ++i) {
4511
52
            std::tie(argument_columns[i], col_const[i]) =
4512
52
                    unpack_if_const(block.get_by_position(arguments[i]).column);
4513
52
        }
4514
4515
13
        const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get());
4516
4517
13
        const auto* col_pos =
4518
13
                assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data();
4519
13
        const auto* col_len =
4520
13
                assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data();
4521
13
        const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get());
4522
4523
13
        ColumnString::MutablePtr col_res = ColumnString::create();
4524
4525
        // if all input string is ascii, we can use ascii function to handle it
4526
13
        const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii();
4527
13
        std::visit(
4528
13
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4529
13
                    if (is_all_ascii) {
4530
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4531
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4532
6
                                input_rows_count);
4533
7
                    } else {
4534
7
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4535
7
                                col_origin, col_pos, col_len, col_insert, col_res,
4536
7
                                input_rows_count);
4537
7
                    }
4538
13
                },
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
4528
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4529
1
                    if (is_all_ascii) {
4530
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4531
0
                                col_origin, col_pos, col_len, col_insert, col_res,
4532
0
                                input_rows_count);
4533
1
                    } else {
4534
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4535
1
                                col_origin, col_pos, col_len, col_insert, col_res,
4536
1
                                input_rows_count);
4537
1
                    }
4538
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
4528
12
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4529
12
                    if (is_all_ascii) {
4530
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4531
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4532
6
                                input_rows_count);
4533
6
                    } else {
4534
6
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4535
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4536
6
                                input_rows_count);
4537
6
                    }
4538
12
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_
4539
13
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
4540
13
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
4541
13
        block.replace_by_position(result, std::move(col_res));
4542
13
        return Status::OK();
4543
13
    }
4544
4545
private:
4546
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
4547
    static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len,
4548
                             const ColumnString* col_insert, ColumnString::MutablePtr& col_res,
4549
6
                             size_t input_rows_count) {
4550
6
        auto& col_res_chars = col_res->get_chars();
4551
6
        auto& col_res_offsets = col_res->get_offsets();
4552
6
        StringRef origin_str, insert_str;
4553
12
        for (size_t i = 0; i < input_rows_count; i++) {
4554
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4555
            // pos is 1-based index,so we need to minus 1
4556
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4557
6
            const auto len = col_len[index_check_const<len_const>(i)];
4558
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4559
6
            const auto origin_size = origin_str.size;
4560
6
            if (pos >= origin_size || pos < 0) {
4561
                // If pos is not within the length of the string, the original string is returned.
4562
3
                col_res->insert_data(origin_str.data, origin_str.size);
4563
3
                continue;
4564
3
            }
4565
3
            col_res_chars.insert(origin_str.data,
4566
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4567
3
            if (pos + len > origin_size || len < 0) {
4568
1
                col_res_chars.insert(insert_str.begin(),
4569
1
                                     insert_str.end()); // copy all of insert_str.
4570
2
            } else {
4571
2
                col_res_chars.insert(insert_str.begin(),
4572
2
                                     insert_str.end()); // copy all of insert_str.
4573
2
                col_res_chars.insert(
4574
2
                        origin_str.data + pos + len,
4575
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4576
2
            }
4577
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4578
3
            col_res_offsets.push_back(col_res_chars.size());
4579
3
        }
4580
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4549
6
                             size_t input_rows_count) {
4550
6
        auto& col_res_chars = col_res->get_chars();
4551
6
        auto& col_res_offsets = col_res->get_offsets();
4552
6
        StringRef origin_str, insert_str;
4553
12
        for (size_t i = 0; i < input_rows_count; i++) {
4554
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4555
            // pos is 1-based index,so we need to minus 1
4556
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4557
6
            const auto len = col_len[index_check_const<len_const>(i)];
4558
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4559
6
            const auto origin_size = origin_str.size;
4560
6
            if (pos >= origin_size || pos < 0) {
4561
                // If pos is not within the length of the string, the original string is returned.
4562
3
                col_res->insert_data(origin_str.data, origin_str.size);
4563
3
                continue;
4564
3
            }
4565
3
            col_res_chars.insert(origin_str.data,
4566
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4567
3
            if (pos + len > origin_size || len < 0) {
4568
1
                col_res_chars.insert(insert_str.begin(),
4569
1
                                     insert_str.end()); // copy all of insert_str.
4570
2
            } else {
4571
2
                col_res_chars.insert(insert_str.begin(),
4572
2
                                     insert_str.end()); // copy all of insert_str.
4573
2
                col_res_chars.insert(
4574
2
                        origin_str.data + pos + len,
4575
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4576
2
            }
4577
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4578
3
            col_res_offsets.push_back(col_res_chars.size());
4579
3
        }
4580
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
4581
4582
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
4583
    NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin,
4584
                                                  int const* col_pos, int const* col_len,
4585
                                                  const ColumnString* col_insert,
4586
                                                  ColumnString::MutablePtr& col_res,
4587
7
                                                  size_t input_rows_count) {
4588
7
        auto& col_res_chars = col_res->get_chars();
4589
7
        auto& col_res_offsets = col_res->get_offsets();
4590
7
        StringRef origin_str, insert_str;
4591
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4592
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4593
7
        std::vector<size_t> utf8_origin_offsets;
4594
29
        for (size_t i = 0; i < input_rows_count; i++) {
4595
22
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4596
            // pos is 1-based index,so we need to minus 1
4597
22
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4598
22
            const auto len = col_len[index_check_const<len_const>(i)];
4599
22
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4600
22
            utf8_origin_offsets.clear();
4601
4602
160
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4603
138
                utf8_origin_offsets.push_back(ni);
4604
138
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4605
138
            }
4606
4607
22
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4608
4609
22
            if (pos >= utf8_origin_size || pos < 0) {
4610
                // If pos is not within the length of the string, the original string is returned.
4611
13
                col_res->insert_data(origin_str.data, origin_str.size);
4612
13
                continue;
4613
13
            }
4614
9
            col_res_chars.insert(
4615
9
                    origin_str.data,
4616
9
                    origin_str.data +
4617
9
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4618
9
            if (pos + len >= utf8_origin_size || len < 0) {
4619
4
                col_res_chars.insert(insert_str.begin(),
4620
4
                                     insert_str.end()); // copy all of insert_str.
4621
5
            } else {
4622
5
                col_res_chars.insert(insert_str.begin(),
4623
5
                                     insert_str.end()); // copy all of insert_str.
4624
5
                col_res_chars.insert(
4625
5
                        origin_str.data + utf8_origin_offsets[pos + len],
4626
5
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4627
5
            }
4628
9
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4629
9
            col_res_offsets.push_back(col_res_chars.size());
4630
9
        }
4631
7
    }
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4587
1
                                                  size_t input_rows_count) {
4588
1
        auto& col_res_chars = col_res->get_chars();
4589
1
        auto& col_res_offsets = col_res->get_offsets();
4590
1
        StringRef origin_str, insert_str;
4591
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4592
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4593
1
        std::vector<size_t> utf8_origin_offsets;
4594
17
        for (size_t i = 0; i < input_rows_count; i++) {
4595
16
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4596
            // pos is 1-based index,so we need to minus 1
4597
16
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4598
16
            const auto len = col_len[index_check_const<len_const>(i)];
4599
16
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4600
16
            utf8_origin_offsets.clear();
4601
4602
116
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4603
100
                utf8_origin_offsets.push_back(ni);
4604
100
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4605
100
            }
4606
4607
16
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4608
4609
16
            if (pos >= utf8_origin_size || pos < 0) {
4610
                // If pos is not within the length of the string, the original string is returned.
4611
10
                col_res->insert_data(origin_str.data, origin_str.size);
4612
10
                continue;
4613
10
            }
4614
6
            col_res_chars.insert(
4615
6
                    origin_str.data,
4616
6
                    origin_str.data +
4617
6
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4618
6
            if (pos + len >= utf8_origin_size || len < 0) {
4619
3
                col_res_chars.insert(insert_str.begin(),
4620
3
                                     insert_str.end()); // copy all of insert_str.
4621
3
            } else {
4622
3
                col_res_chars.insert(insert_str.begin(),
4623
3
                                     insert_str.end()); // copy all of insert_str.
4624
3
                col_res_chars.insert(
4625
3
                        origin_str.data + utf8_origin_offsets[pos + len],
4626
3
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4627
3
            }
4628
6
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4629
6
            col_res_offsets.push_back(col_res_chars.size());
4630
6
        }
4631
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4587
6
                                                  size_t input_rows_count) {
4588
6
        auto& col_res_chars = col_res->get_chars();
4589
6
        auto& col_res_offsets = col_res->get_offsets();
4590
6
        StringRef origin_str, insert_str;
4591
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4592
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4593
6
        std::vector<size_t> utf8_origin_offsets;
4594
12
        for (size_t i = 0; i < input_rows_count; i++) {
4595
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4596
            // pos is 1-based index,so we need to minus 1
4597
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4598
6
            const auto len = col_len[index_check_const<len_const>(i)];
4599
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4600
6
            utf8_origin_offsets.clear();
4601
4602
44
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4603
38
                utf8_origin_offsets.push_back(ni);
4604
38
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4605
38
            }
4606
4607
6
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4608
4609
6
            if (pos >= utf8_origin_size || pos < 0) {
4610
                // If pos is not within the length of the string, the original string is returned.
4611
3
                col_res->insert_data(origin_str.data, origin_str.size);
4612
3
                continue;
4613
3
            }
4614
3
            col_res_chars.insert(
4615
3
                    origin_str.data,
4616
3
                    origin_str.data +
4617
3
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4618
3
            if (pos + len >= utf8_origin_size || len < 0) {
4619
1
                col_res_chars.insert(insert_str.begin(),
4620
1
                                     insert_str.end()); // copy all of insert_str.
4621
2
            } else {
4622
2
                col_res_chars.insert(insert_str.begin(),
4623
2
                                     insert_str.end()); // copy all of insert_str.
4624
2
                col_res_chars.insert(
4625
2
                        origin_str.data + utf8_origin_offsets[pos + len],
4626
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4627
2
            }
4628
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4629
3
            col_res_offsets.push_back(col_res_chars.size());
4630
3
        }
4631
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
4632
};
4633
4634
class FunctionNgramSearch : public IFunction {
4635
public:
4636
    static constexpr auto name = "ngram_search";
4637
2
    static FunctionPtr create() { return std::make_shared<FunctionNgramSearch>(); }
4638
1
    String get_name() const override { return name; }
4639
0
    size_t get_number_of_arguments() const override { return 3; }
4640
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4641
0
        return std::make_shared<DataTypeFloat64>();
4642
0
    }
4643
4644
    // ngram_search(text,pattern,gram_num)
4645
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4646
0
                        uint32_t result, size_t input_rows_count) const override {
4647
0
        CHECK_EQ(arguments.size(), 3);
4648
0
        auto col_res = ColumnFloat64::create();
4649
0
        bool col_const[3];
4650
0
        ColumnPtr argument_columns[3];
4651
0
        for (int i = 0; i < 3; ++i) {
4652
0
            std::tie(argument_columns[i], col_const[i]) =
4653
0
                    unpack_if_const(block.get_by_position(arguments[i]).column);
4654
0
        }
4655
        // There is no need to check if the 2-th,3-th parameters are const here because fe has already checked them.
4656
0
        auto pattern = assert_cast<const ColumnString*>(argument_columns[1].get())->get_data_at(0);
4657
0
        auto gram_num = assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_element(0);
4658
0
        const auto* text_col = assert_cast<const ColumnString*>(argument_columns[0].get());
4659
4660
0
        if (col_const[0]) {
4661
0
            _execute_impl<true>(text_col, pattern, gram_num, *col_res, input_rows_count);
4662
0
        } else {
4663
0
            _execute_impl<false>(text_col, pattern, gram_num, *col_res, input_rows_count);
4664
0
        }
4665
4666
0
        block.replace_by_position(result, std::move(col_res));
4667
0
        return Status::OK();
4668
0
    }
4669
4670
private:
4671
    using NgramMap = phmap::flat_hash_map<uint32_t, uint8_t>;
4672
    // In the map, the key is the CRC32 hash result of a substring in the string,
4673
    // and the value indicates whether this hash is found in the text or pattern.
4674
    constexpr static auto not_found = 0b00;
4675
    constexpr static auto found_in_pattern = 0b01;
4676
    constexpr static auto found_in_text = 0b10;
4677
    constexpr static auto found_in_pattern_and_text = 0b11;
4678
4679
0
    uint32_t sub_str_hash(const char* data, int32_t length) const {
4680
0
        constexpr static uint32_t seed = 0;
4681
0
        return crc32c::Extend(seed, (const uint8_t*)data, length);
4682
0
    }
4683
4684
    template <bool column_const>
4685
    void _execute_impl(const ColumnString* text_col, StringRef& pattern, int gram_num,
4686
0
                       ColumnFloat64& res, size_t size) const {
4687
0
        auto& res_data = res.get_data();
4688
0
        res_data.resize_fill(size, 0);
4689
        // If the length of the pattern is less than gram_num, return 0.
4690
0
        if (pattern.size < gram_num) {
4691
0
            return;
4692
0
        }
4693
4694
        // Build a map by pattern string, which will be used repeatedly in the following loop.
4695
0
        NgramMap pattern_map;
4696
0
        int pattern_count = get_pattern_set(pattern_map, pattern, gram_num);
4697
        // Each time a loop is executed, the map will be modified, so it needs to be restored afterward.
4698
0
        std::vector<uint32_t> restore_map;
4699
4700
0
        for (int i = 0; i < size; i++) {
4701
0
            auto text = text_col->get_data_at(index_check_const<column_const>(i));
4702
0
            if (text.size < gram_num) {
4703
                // If the length of the text is less than gram_num, return 0.
4704
0
                continue;
4705
0
            }
4706
0
            restore_map.reserve(text.size);
4707
0
            auto [text_count, intersection_count] =
4708
0
                    get_text_set(text, gram_num, pattern_map, restore_map);
4709
4710
            // 2 * |Intersection| / (|text substr set| + |pattern substr set|)
4711
0
            res_data[i] = 2.0 * intersection_count / (text_count + pattern_count);
4712
0
        }
4713
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb1EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm
Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb0EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm
4714
4715
0
    size_t get_pattern_set(NgramMap& pattern_map, StringRef& pattern, int gram_num) const {
4716
0
        size_t pattern_count = 0;
4717
0
        for (int i = 0; i + gram_num <= pattern.size; i++) {
4718
0
            uint32_t cur_hash = sub_str_hash(pattern.data + i, gram_num);
4719
0
            if (!pattern_map.contains(cur_hash)) {
4720
0
                pattern_map[cur_hash] = found_in_pattern;
4721
0
                pattern_count++;
4722
0
            }
4723
0
        }
4724
0
        return pattern_count;
4725
0
    }
4726
4727
    std::pair<size_t, size_t> get_text_set(StringRef& text, int gram_num, NgramMap& pattern_map,
4728
0
                                           std::vector<uint32_t>& restore_map) const {
4729
0
        restore_map.clear();
4730
        //intersection_count indicates a substring both in pattern and text.
4731
0
        size_t text_count = 0, intersection_count = 0;
4732
0
        for (int i = 0; i + gram_num <= text.size; i++) {
4733
0
            uint32_t cur_hash = sub_str_hash(text.data + i, gram_num);
4734
0
            auto& val = pattern_map[cur_hash];
4735
0
            if (val == not_found) {
4736
0
                val ^= found_in_text;
4737
0
                DCHECK(val == found_in_text);
4738
                // only found in text
4739
0
                text_count++;
4740
0
                restore_map.push_back(cur_hash);
4741
0
            } else if (val == found_in_pattern) {
4742
0
                val ^= found_in_text;
4743
0
                DCHECK(val == found_in_pattern_and_text);
4744
                // found in text and pattern
4745
0
                text_count++;
4746
0
                intersection_count++;
4747
0
                restore_map.push_back(cur_hash);
4748
0
            }
4749
0
        }
4750
        // Restore the pattern_map.
4751
0
        for (auto& restore_hash : restore_map) {
4752
0
            pattern_map[restore_hash] ^= found_in_text;
4753
0
        }
4754
4755
0
        return {text_count, intersection_count};
4756
0
    }
4757
};
4758
4759
class FunctionTranslate : public IFunction {
4760
public:
4761
    static constexpr auto name = "translate";
4762
    using AsciiMap = std::array<UInt8, 128>;
4763
    constexpr static UInt8 DELETE_CHAR = 255; // 255 means delete this char
4764
2
    static FunctionPtr create() { return std::make_shared<FunctionTranslate>(); }
4765
1
    String get_name() const override { return name; }
4766
0
    size_t get_number_of_arguments() const override { return 3; }
4767
4768
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4769
0
        return std::make_shared<DataTypeString>();
4770
0
    };
4771
4772
1
    DataTypes get_variadic_argument_types_impl() const override {
4773
1
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
4774
1
                std::make_shared<DataTypeString>()};
4775
1
    }
4776
4777
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4778
0
                        uint32_t result, size_t input_rows_count) const override {
4779
0
        CHECK_EQ(arguments.size(), 3);
4780
0
        auto col_res = ColumnString::create();
4781
0
        bool col_const[3];
4782
0
        ColumnPtr argument_columns[3];
4783
0
        for (int i = 0; i < 3; ++i) {
4784
0
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
4785
0
        }
4786
0
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
4787
0
                                                     *block.get_by_position(arguments[0]).column)
4788
0
                                                     .convert_to_full_column()
4789
0
                                           : block.get_by_position(arguments[0]).column;
4790
0
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
4791
4792
0
        const auto* col_source = assert_cast<const ColumnString*>(argument_columns[0].get());
4793
0
        const auto* col_from = assert_cast<const ColumnString*>(argument_columns[1].get());
4794
0
        const auto* col_to = assert_cast<const ColumnString*>(argument_columns[2].get());
4795
4796
0
        bool is_ascii = col_source->is_ascii() && col_from->is_ascii() && col_to->is_ascii();
4797
0
        auto impl_vectors = impl_vectors_utf8<false>;
4798
0
        if (col_const[1] && col_const[2] && is_ascii) {
4799
0
            impl_vectors = impl_vectors_ascii<true>;
4800
0
        } else if (col_const[1] && col_const[2]) {
4801
0
            impl_vectors = impl_vectors_utf8<true>;
4802
0
        } else if (is_ascii) {
4803
0
            impl_vectors = impl_vectors_ascii<false>;
4804
0
        }
4805
0
        impl_vectors(col_source, col_from, col_to, col_res.get());
4806
0
        block.get_by_position(result).column = std::move(col_res);
4807
0
        return Status::OK();
4808
0
    }
4809
4810
private:
4811
    template <bool IsConst>
4812
    static void impl_vectors_ascii(const ColumnString* col_source, const ColumnString* col_from,
4813
0
                                   const ColumnString* col_to, ColumnString* col_res) {
4814
0
        auto& res_chars = col_res->get_chars();
4815
0
        auto& res_offsets = col_res->get_offsets();
4816
0
        res_chars.reserve(col_source->get_chars().size());
4817
0
        res_offsets.reserve(col_source->get_offsets().size());
4818
0
        DCHECK_EQ(col_res->size(), 0);
4819
0
        AsciiMap map;
4820
0
        if (IsConst) {
4821
0
            const auto& from_str = col_from->get_data_at(0);
4822
0
            const auto& to_str = col_to->get_data_at(0);
4823
0
            if (!build_translate_map_ascii(map, from_str, to_str)) {
4824
                // if the map is not need delete char, we can directly copy the source string,then use map to translate
4825
0
                res_offsets.insert(col_source->get_offsets().begin(),
4826
0
                                   col_source->get_offsets().end());
4827
0
                res_chars.insert(col_source->get_chars().begin(), col_source->get_chars().end());
4828
0
                for (int i = 0; i < res_chars.size(); ++i) {
4829
0
                    res_chars[i] = map[res_chars[i]]; // translate the chars
4830
0
                }
4831
0
                return; // no need to translate
4832
0
            }
4833
0
        }
4834
4835
0
        auto res_size = 0;
4836
0
        auto* begin_data = col_res->get_chars().data();
4837
0
        for (size_t i = 0; i < col_source->size(); ++i) {
4838
0
            const auto& source_str = col_source->get_data_at(i);
4839
0
            if (!IsConst) {
4840
0
                const auto& from_str = col_from->get_data_at(i);
4841
0
                const auto& to_str = col_to->get_data_at(i);
4842
0
                build_translate_map_ascii(map, from_str, to_str);
4843
0
            }
4844
0
            auto* dst_data = begin_data + res_size;
4845
0
            res_size += translate_ascii(source_str, map, dst_data);
4846
4847
0
            res_offsets.push_back(res_size);
4848
0
        }
4849
0
        DCHECK_GE(res_chars.capacity(), res_size);
4850
0
        res_chars.resize(res_size);
4851
0
    }
Unexecuted instantiation: _ZN5doris17FunctionTranslate18impl_vectors_asciiILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Unexecuted instantiation: _ZN5doris17FunctionTranslate18impl_vectors_asciiILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
4852
4853
    // return true if no need delete char
4854
    bool static build_translate_map_ascii(AsciiMap& map, const StringRef& from_str,
4855
0
                                          const StringRef& to_str) {
4856
0
        for (size_t i = 0; i < map.size(); ++i) {
4857
0
            map[i] = i; // initialize map to identity
4858
0
        }
4859
0
        std::array<UInt8, 128> set_map {0};
4860
0
        const auto min_size = std::min(from_str.size, to_str.size);
4861
        // all ascii characters are in the range [0, 127]
4862
0
        for (size_t i = 0; i < min_size; ++i) {
4863
0
            auto from_char = from_str.data[i];
4864
0
            auto to_char = to_str.data[i];
4865
0
            if (set_map[from_char] == 0) {
4866
0
                set_map[from_char] = 1;
4867
0
                map[from_char] = to_char;
4868
0
            }
4869
0
        }
4870
4871
0
        bool need_delete_char = false;
4872
4873
0
        for (size_t i = min_size; i < from_str.size; ++i) {
4874
0
            auto from_char = from_str.data[i];
4875
0
            if (set_map[from_char] == 0) {
4876
0
                set_map[from_char] = 1;
4877
0
                map[from_char] = DELETE_CHAR; // delete this char
4878
0
                need_delete_char = true;
4879
0
            }
4880
0
        }
4881
0
        return need_delete_char;
4882
0
    }
4883
4884
0
    static size_t translate_ascii(const StringRef& source_str, AsciiMap& map, UInt8* dst_data) {
4885
0
        auto* begin_data = dst_data;
4886
0
        for (size_t i = 0; i < source_str.size; ++i) {
4887
0
            auto c = source_str.data[i];
4888
0
            if (map[c] == DELETE_CHAR) {
4889
0
                continue; // delete this char
4890
0
            }
4891
0
            *dst_data++ = map[c];
4892
0
        }
4893
0
        return dst_data - begin_data;
4894
0
    }
4895
4896
    template <bool IsConst>
4897
    static void impl_vectors_utf8(const ColumnString* col_source, const ColumnString* col_from,
4898
0
                                  const ColumnString* col_to, ColumnString* col_res) {
4899
0
        col_res->get_chars().reserve(col_source->get_chars().size());
4900
0
        col_res->get_offsets().reserve(col_source->get_offsets().size());
4901
0
        std::unordered_map<std::string_view, std::string_view> translate_map;
4902
0
        if (IsConst) {
4903
0
            const auto& from_str = col_from->get_data_at(0);
4904
0
            const auto& to_str = col_to->get_data_at(0);
4905
0
            translate_map =
4906
0
                    build_translate_map_utf8(from_str.to_string_view(), to_str.to_string_view());
4907
0
        }
4908
0
        for (size_t i = 0; i < col_source->size(); ++i) {
4909
0
            const auto& source_str = col_source->get_data_at(i);
4910
0
            if (!IsConst) {
4911
0
                const auto& from_str = col_from->get_data_at(i);
4912
0
                const auto& to_str = col_to->get_data_at(i);
4913
0
                translate_map = build_translate_map_utf8(from_str.to_string_view(),
4914
0
                                                         to_str.to_string_view());
4915
0
            }
4916
0
            auto translated_str = translate_utf8(source_str.to_string_view(), translate_map);
4917
0
            col_res->insert_data(translated_str.data(), translated_str.size());
4918
0
        }
4919
0
    }
Unexecuted instantiation: _ZN5doris17FunctionTranslate17impl_vectors_utf8ILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Unexecuted instantiation: _ZN5doris17FunctionTranslate17impl_vectors_utf8ILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
4920
4921
    static std::unordered_map<std::string_view, std::string_view> build_translate_map_utf8(
4922
0
            const std::string_view& from_str, const std::string_view& to_str) {
4923
0
        std::unordered_map<std::string_view, std::string_view> translate_map;
4924
0
        for (size_t i = 0, from_char_size = 0, j = 0, to_char_size = 0; i < from_str.size();
4925
0
             i += from_char_size, j += to_char_size) {
4926
0
            from_char_size = get_utf8_byte_length(from_str[i]);
4927
0
            to_char_size = j < to_str.size() ? get_utf8_byte_length(to_str[j]) : 0;
4928
0
            auto from_char = from_str.substr(i, from_char_size);
4929
0
            if (translate_map.find(from_char) == translate_map.end()) {
4930
0
                translate_map[from_char] =
4931
0
                        j < to_str.size() ? to_str.substr(j, to_char_size) : std::string_view();
4932
0
            }
4933
0
        }
4934
0
        return translate_map;
4935
0
    }
4936
4937
    static std::string translate_utf8(
4938
            const std::string_view& source_str,
4939
0
            std::unordered_map<std::string_view, std::string_view>& translate_map) {
4940
0
        std::string result;
4941
0
        result.reserve(source_str.size());
4942
0
        for (size_t i = 0, char_size = 0; i < source_str.size(); i += char_size) {
4943
0
            char_size = get_utf8_byte_length(source_str[i]);
4944
0
            auto c = source_str.substr(i, char_size);
4945
0
            if (translate_map.find(c) != translate_map.end()) {
4946
0
                if (!translate_map[c].empty()) {
4947
0
                    result.append(translate_map[c]);
4948
0
                }
4949
0
            } else {
4950
0
                result.append(c);
4951
0
            }
4952
0
        }
4953
0
        return result;
4954
0
    }
4955
};
4956
4957
/// xpath_string(xml, xpath) -> String
4958
/// Returns the text content of the first node that matches the XPath expression.
4959
/// Returns NULL if either xml or xpath is NULL.
4960
/// Returns empty string if the XPath expression matches no nodes.
4961
/// The text content includes the node and all its descendants.
4962
/// Example:
4963
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1'
4964
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2'
4965
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = ''
4966
///   xpath_string('invalid xml', '/a/b[1]') = NULL
4967
///   xpath_string(NULL, '/a/b[1]') = NULL
4968
///   xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL
4969
class FunctionXPathString : public IFunction {
4970
public:
4971
    static constexpr auto name = "xpath_string";
4972
78
    static FunctionPtr create() { return std::make_shared<FunctionXPathString>(); }
4973
1
    String get_name() const override { return name; }
4974
76
    size_t get_number_of_arguments() const override { return 2; }
4975
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4976
76
        return make_nullable(std::make_shared<DataTypeString>());
4977
76
    }
4978
4979
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4980
67
                        uint32_t result, size_t input_rows_count) const override {
4981
67
        CHECK_EQ(arguments.size(), 2);
4982
67
        auto col_res = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create());
4983
67
        const auto& [left_col, left_const] =
4984
67
                unpack_if_const(block.get_by_position(arguments[0]).column);
4985
67
        const auto& [right_col, right_const] =
4986
67
                unpack_if_const(block.get_by_position(arguments[1]).column);
4987
67
        const auto& xml_col = *assert_cast<const ColumnString*>(left_col.get());
4988
67
        const auto& xpath_col = *assert_cast<const ColumnString*>(right_col.get());
4989
4990
67
        Status status;
4991
67
        if (left_const && right_const) {
4992
0
            status = execute_vector<true, true>(input_rows_count, xml_col, xpath_col, *col_res);
4993
67
        } else if (left_const) {
4994
22
            status = execute_vector<true, false>(input_rows_count, xml_col, xpath_col, *col_res);
4995
45
        } else if (right_const) {
4996
22
            status = execute_vector<false, true>(input_rows_count, xml_col, xpath_col, *col_res);
4997
23
        } else {
4998
23
            status = execute_vector<false, false>(input_rows_count, xml_col, xpath_col, *col_res);
4999
23
        }
5000
67
        if (!status.ok()) {
5001
0
            return status;
5002
0
        }
5003
5004
67
        block.get_by_position(result).column = std::move(col_res);
5005
67
        return Status::OK();
5006
67
    }
5007
5008
private:
5009
81
    static Status parse_xml(const StringRef& xml_str, pugi::xml_document& xml_doc) {
5010
81
        pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data, xml_str.size);
5011
81
        if (!result) {
5012
0
            return Status::InvalidArgument("Function {} failed to parse XML string: {}", name,
5013
0
                                           result.description());
5014
0
        }
5015
81
        return Status::OK();
5016
81
    }
5017
5018
84
    static Status build_xpath_query(const StringRef& xpath_str, pugi::xpath_query& xpath_query) {
5019
        // xpath_query will throws xpath_exception on compilation errors.
5020
84
        try {
5021
            // NOTE!!!: don't use to_string_view(), because xpath_str maybe not null-terminated
5022
84
            xpath_query = pugi::xpath_query(xpath_str.to_string().c_str());
5023
84
        } catch (const pugi::xpath_exception& e) {
5024
0
            return Status::InvalidArgument("Function {} failed to build XPath query: {}", name,
5025
0
                                           e.what());
5026
0
        }
5027
84
        return Status::OK();
5028
84
    }
5029
5030
    template <bool left_const, bool right_const>
5031
    static Status execute_vector(const size_t input_rows_count, const ColumnString& xml_col,
5032
67
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
5033
67
        pugi::xml_document xml_doc;
5034
67
        pugi::xpath_query xpath_query;
5035
        // first check right_const, because we want to check empty input first
5036
67
        if constexpr (right_const) {
5037
22
            auto xpath_str = xpath_col.get_data_at(0);
5038
22
            if (xpath_str.empty()) {
5039
                // should return null if xpath_str is empty
5040
1
                res_col.insert_many_defaults(input_rows_count);
5041
1
                return Status::OK();
5042
1
            }
5043
21
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5044
21
        }
5045
22
        if constexpr (left_const) {
5046
22
            auto xml_str = xml_col.get_data_at(0);
5047
22
            if (xml_str.empty()) {
5048
                // should return null if xml_str is empty
5049
1
                res_col.insert_many_defaults(input_rows_count);
5050
1
                return Status::OK();
5051
1
            }
5052
21
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5053
21
        }
5054
5055
156
        for (size_t i = 0; i < input_rows_count; ++i) {
5056
89
            if constexpr (!right_const) {
5057
68
                auto xpath_str = xpath_col.get_data_at(i);
5058
68
                if (xpath_str.empty()) {
5059
                    // should return null if xpath_str is empty
5060
5
                    res_col.insert_default();
5061
5
                    continue;
5062
5
                }
5063
63
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5064
63
            }
5065
68
            if constexpr (!left_const) {
5066
68
                auto xml_str = xml_col.get_data_at(i);
5067
68
                if (xml_str.empty()) {
5068
                    // should return null if xml_str is empty
5069
4
                    res_col.insert_default();
5070
4
                    continue;
5071
4
                }
5072
64
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5073
64
            }
5074
64
            std::string text;
5075
89
            try {
5076
89
                text = xpath_query.evaluate_string(xml_doc);
5077
89
            } catch (const pugi::xpath_exception& e) {
5078
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
5079
0
                                               e.what());
5080
0
            }
5081
80
            res_col.insert_data(text.data(), text.size());
5082
80
        }
5083
67
        return Status::OK();
5084
67
    }
Unexecuted instantiation: _ZN5doris19FunctionXPathString14execute_vectorILb1ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
_ZN5doris19FunctionXPathString14execute_vectorILb1ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
5032
22
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
5033
22
        pugi::xml_document xml_doc;
5034
22
        pugi::xpath_query xpath_query;
5035
        // first check right_const, because we want to check empty input first
5036
        if constexpr (right_const) {
5037
            auto xpath_str = xpath_col.get_data_at(0);
5038
            if (xpath_str.empty()) {
5039
                // should return null if xpath_str is empty
5040
                res_col.insert_many_defaults(input_rows_count);
5041
                return Status::OK();
5042
            }
5043
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5044
        }
5045
22
        if constexpr (left_const) {
5046
22
            auto xml_str = xml_col.get_data_at(0);
5047
22
            if (xml_str.empty()) {
5048
                // should return null if xml_str is empty
5049
1
                res_col.insert_many_defaults(input_rows_count);
5050
1
                return Status::OK();
5051
1
            }
5052
21
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5053
21
        }
5054
5055
43
        for (size_t i = 0; i < input_rows_count; ++i) {
5056
21
            if constexpr (!right_const) {
5057
21
                auto xpath_str = xpath_col.get_data_at(i);
5058
21
                if (xpath_str.empty()) {
5059
                    // should return null if xpath_str is empty
5060
1
                    res_col.insert_default();
5061
1
                    continue;
5062
1
                }
5063
20
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5064
20
            }
5065
            if constexpr (!left_const) {
5066
                auto xml_str = xml_col.get_data_at(i);
5067
                if (xml_str.empty()) {
5068
                    // should return null if xml_str is empty
5069
                    res_col.insert_default();
5070
                    continue;
5071
                }
5072
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5073
            }
5074
21
            std::string text;
5075
21
            try {
5076
21
                text = xpath_query.evaluate_string(xml_doc);
5077
21
            } catch (const pugi::xpath_exception& e) {
5078
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
5079
0
                                               e.what());
5080
0
            }
5081
20
            res_col.insert_data(text.data(), text.size());
5082
20
        }
5083
22
        return Status::OK();
5084
22
    }
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
5032
22
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
5033
22
        pugi::xml_document xml_doc;
5034
22
        pugi::xpath_query xpath_query;
5035
        // first check right_const, because we want to check empty input first
5036
22
        if constexpr (right_const) {
5037
22
            auto xpath_str = xpath_col.get_data_at(0);
5038
22
            if (xpath_str.empty()) {
5039
                // should return null if xpath_str is empty
5040
1
                res_col.insert_many_defaults(input_rows_count);
5041
1
                return Status::OK();
5042
1
            }
5043
21
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5044
21
        }
5045
        if constexpr (left_const) {
5046
            auto xml_str = xml_col.get_data_at(0);
5047
            if (xml_str.empty()) {
5048
                // should return null if xml_str is empty
5049
                res_col.insert_many_defaults(input_rows_count);
5050
                return Status::OK();
5051
            }
5052
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5053
        }
5054
5055
43
        for (size_t i = 0; i < input_rows_count; ++i) {
5056
            if constexpr (!right_const) {
5057
                auto xpath_str = xpath_col.get_data_at(i);
5058
                if (xpath_str.empty()) {
5059
                    // should return null if xpath_str is empty
5060
                    res_col.insert_default();
5061
                    continue;
5062
                }
5063
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5064
            }
5065
21
            if constexpr (!left_const) {
5066
21
                auto xml_str = xml_col.get_data_at(i);
5067
21
                if (xml_str.empty()) {
5068
                    // should return null if xml_str is empty
5069
1
                    res_col.insert_default();
5070
1
                    continue;
5071
1
                }
5072
20
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5073
20
            }
5074
20
            std::string text;
5075
21
            try {
5076
21
                text = xpath_query.evaluate_string(xml_doc);
5077
21
            } catch (const pugi::xpath_exception& e) {
5078
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
5079
0
                                               e.what());
5080
0
            }
5081
20
            res_col.insert_data(text.data(), text.size());
5082
20
        }
5083
22
        return Status::OK();
5084
22
    }
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
5032
23
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
5033
23
        pugi::xml_document xml_doc;
5034
23
        pugi::xpath_query xpath_query;
5035
        // first check right_const, because we want to check empty input first
5036
        if constexpr (right_const) {
5037
            auto xpath_str = xpath_col.get_data_at(0);
5038
            if (xpath_str.empty()) {
5039
                // should return null if xpath_str is empty
5040
                res_col.insert_many_defaults(input_rows_count);
5041
                return Status::OK();
5042
            }
5043
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5044
        }
5045
        if constexpr (left_const) {
5046
            auto xml_str = xml_col.get_data_at(0);
5047
            if (xml_str.empty()) {
5048
                // should return null if xml_str is empty
5049
                res_col.insert_many_defaults(input_rows_count);
5050
                return Status::OK();
5051
            }
5052
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5053
        }
5054
5055
70
        for (size_t i = 0; i < input_rows_count; ++i) {
5056
47
            if constexpr (!right_const) {
5057
47
                auto xpath_str = xpath_col.get_data_at(i);
5058
47
                if (xpath_str.empty()) {
5059
                    // should return null if xpath_str is empty
5060
4
                    res_col.insert_default();
5061
4
                    continue;
5062
4
                }
5063
43
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
5064
43
            }
5065
47
            if constexpr (!left_const) {
5066
47
                auto xml_str = xml_col.get_data_at(i);
5067
47
                if (xml_str.empty()) {
5068
                    // should return null if xml_str is empty
5069
3
                    res_col.insert_default();
5070
3
                    continue;
5071
3
                }
5072
44
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
5073
44
            }
5074
44
            std::string text;
5075
47
            try {
5076
47
                text = xpath_query.evaluate_string(xml_doc);
5077
47
            } catch (const pugi::xpath_exception& e) {
5078
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
5079
0
                                               e.what());
5080
0
            }
5081
40
            res_col.insert_data(text.data(), text.size());
5082
40
        }
5083
23
        return Status::OK();
5084
23
    }
5085
};
5086
5087
class MakeSetImpl {
5088
public:
5089
    static constexpr auto name = "make_set";
5090
5091
0
    static size_t get_number_of_arguments() { return 0; }
5092
1
    static bool is_variadic() { return true; }
5093
0
    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
5094
0
        if (arguments[0].get()->is_nullable()) {
5095
0
            return make_nullable(std::make_shared<DataTypeString>());
5096
0
        }
5097
0
        return std::make_shared<DataTypeString>();
5098
0
    }
5099
5100
    static bool is_return_nullable(bool has_nullable,
5101
0
                                   const std::vector<ColumnWithConstAndNullMap>& cols_info) {
5102
0
        return cols_info[0].null_map != nullptr;
5103
0
    }
5104
5105
    static bool execute_const_null(ColumnString::MutablePtr& res_col,
5106
                                   PaddedPODArray<UInt8>& res_null_map_data,
5107
0
                                   size_t input_rows_count, size_t null_index) {
5108
0
        if (null_index == 1) {
5109
0
            res_col->insert_many_defaults(input_rows_count);
5110
0
            res_null_map_data.assign(input_rows_count, (UInt8)1);
5111
0
            return true;
5112
0
        }
5113
0
        return false;
5114
0
    }
5115
5116
    static void execute(const std::vector<ColumnWithConstAndNullMap>& column_infos,
5117
                        ColumnString::MutablePtr& res_col, PaddedPODArray<UInt8>& res_null_map_data,
5118
0
                        size_t input_rows_count) {
5119
0
        static constexpr char SEPARATOR = ',';
5120
0
        const auto& bit_data =
5121
0
                assert_cast<const ColumnInt64&>(*column_infos[0].nested_col).get_data();
5122
0
        std::vector<const ColumnString*> str_cols(column_infos.size());
5123
0
        for (size_t i = 1; i < column_infos.size(); ++i) {
5124
0
            str_cols[i] = assert_cast<const ColumnString*>(column_infos[i].nested_col);
5125
0
        }
5126
5127
0
        for (size_t row = 0; row < input_rows_count; ++row) {
5128
0
            if (column_infos[0].is_null_at(row)) {
5129
0
                res_col->insert_default();
5130
0
                res_null_map_data[row] = 1;
5131
0
                continue;
5132
0
            }
5133
5134
0
            uint64_t bit = bit_data[column_infos[0].is_const ? 0 : row];
5135
0
            uint64_t col_pos = __builtin_ffsll(bit);
5136
0
            ColumnString::Chars data;
5137
0
            while (col_pos != 0 && col_pos < column_infos.size() && bit != 0) {
5138
0
                if (!column_infos[col_pos].is_null_at(row)) {
5139
                    /* Here insert `str,` directly to support the case below:
5140
                     * SELECT MAKE_SET(3, '', 'a');
5141
                     * the exception result should be ',a'.
5142
                     */
5143
0
                    auto s_ref = str_cols[col_pos]->get_data_at(
5144
0
                            column_infos[col_pos].is_const ? 0 : row);
5145
0
                    data.insert(s_ref.data, s_ref.data + s_ref.size);
5146
0
                    data.push_back(SEPARATOR);
5147
0
                }
5148
0
                bit &= ~(1ULL << (col_pos - 1));
5149
0
                col_pos = __builtin_ffsll(bit);
5150
0
            }
5151
            // remove the last ','
5152
0
            if (!data.empty()) {
5153
0
                data.pop_back();
5154
0
            }
5155
0
            res_col->insert_data(reinterpret_cast<const char*>(data.data()), data.size());
5156
0
        }
5157
0
    }
5158
};
5159
5160
class FunctionExportSet : public IFunction {
5161
public:
5162
    static constexpr auto name = "export_set";
5163
2
    static FunctionPtr create() { return std::make_shared<FunctionExportSet>(); }
5164
0
    String get_name() const override { return name; }
5165
0
    size_t get_number_of_arguments() const override { return 0; }
5166
1
    bool is_variadic() const override { return true; }
5167
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5168
0
        return std::make_shared<DataTypeString>();
5169
0
    }
5170
5171
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5172
0
                        uint32_t result, size_t input_rows_count) const override {
5173
0
        auto res_col = ColumnString::create();
5174
5175
0
        const size_t arg_size = arguments.size();
5176
0
        bool col_const[5];
5177
0
        ColumnPtr arg_cols[5];
5178
0
        bool all_const = true;
5179
0
        for (int i = 1; i < arg_size; ++i) {
5180
0
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
5181
0
            all_const = all_const && col_const[i];
5182
0
        }
5183
0
        std::tie(arg_cols[0], col_const[0]) =
5184
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
5185
0
        if (arg_size == 3) {
5186
0
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2}, block, arguments);
5187
0
        } else if (arg_size == 4) {
5188
0
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3}, block, arguments);
5189
0
        } else if (arg_size == 5) {
5190
0
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3, 4}, block,
5191
0
                                                 arguments);
5192
0
        }
5193
5194
0
        const auto* bit_col = assert_cast<const ColumnInt128*>(arg_cols[0].get());
5195
0
        const auto* on_col = assert_cast<const ColumnString*>(arg_cols[1].get());
5196
0
        const auto* off_col = assert_cast<const ColumnString*>(arg_cols[2].get());
5197
0
        const ColumnString* sep_col = nullptr;
5198
0
        const ColumnInt32* num_bits_col = nullptr;
5199
0
        if (arg_size > 3) {
5200
0
            sep_col = assert_cast<const ColumnString*>(arg_cols[3].get());
5201
0
            if (arg_size == 5) {
5202
0
                num_bits_col = assert_cast<const ColumnInt32*>(arg_cols[4].get());
5203
0
            }
5204
0
        }
5205
5206
0
        for (size_t i = 0; i < input_rows_count; ++i) {
5207
0
            uint64_t bit =
5208
0
                    check_and_get_bit(bit_col->get_element(index_check_const(i, col_const[0])));
5209
5210
0
            size_t idx_for_args = all_const ? 0 : i;
5211
0
            StringRef on = on_col->get_data_at(idx_for_args);
5212
0
            StringRef off = off_col->get_data_at(idx_for_args);
5213
0
            StringRef separator(",", 1);
5214
0
            int8_t num_of_bits = 64;
5215
5216
0
            if (arg_size > 3) {
5217
0
                separator = sep_col->get_data_at(idx_for_args);
5218
0
                if (arg_size == 5) {
5219
0
                    num_of_bits =
5220
0
                            check_and_get_num_of_bits(num_bits_col->get_element(idx_for_args));
5221
0
                }
5222
0
            }
5223
5224
0
            execute_single(bit, on, off, separator, num_of_bits, *res_col);
5225
0
        }
5226
0
        block.replace_by_position(result, std::move(res_col));
5227
0
        return Status::OK();
5228
0
    }
5229
5230
private:
5231
    /* The valid range of the input `bit` parameter should be [-2^63, 2^64 - 1]
5232
     * If it exceeds this range, the MAX/MIN values of the signed 64-bit integer are used for calculation
5233
     * This behavior is consistent with MySQL.
5234
     */
5235
0
    uint64_t check_and_get_bit(__int128 col_bit_val) const {
5236
0
        if (col_bit_val > ULLONG_MAX) {
5237
0
            return LLONG_MAX;
5238
0
        } else if (col_bit_val < LLONG_MIN) {
5239
0
            return LLONG_MIN;
5240
0
        }
5241
0
        return static_cast<uint64_t>(col_bit_val);
5242
0
    }
5243
5244
    // If the input value is not in the range [0, 64], return default value 64
5245
0
    int8_t check_and_get_num_of_bits(int32_t col_num_of_bits_val) const {
5246
0
        if (col_num_of_bits_val >= 0 && col_num_of_bits_val <= 64) {
5247
0
            return static_cast<int8_t>(col_num_of_bits_val);
5248
0
        }
5249
0
        return 64;
5250
0
    }
5251
5252
    void execute_single(uint64_t bit, const StringRef& on, const StringRef& off,
5253
                        const StringRef& separator, int8_t num_of_bits,
5254
0
                        ColumnString& res_col) const {
5255
0
        ColumnString::Chars data;
5256
0
        data.reserve(std::max(on.size, off.size) * num_of_bits +
5257
0
                     separator.size * (num_of_bits - 1));
5258
5259
0
        while (bit && num_of_bits) {
5260
0
            if (bit & 1) {
5261
0
                data.insert(on.data, on.data + on.size);
5262
0
            } else {
5263
0
                data.insert(off.data, off.data + off.size);
5264
0
            }
5265
0
            bit >>= 1;
5266
0
            if (--num_of_bits) {
5267
0
                data.insert(separator.data, separator.data + separator.size);
5268
0
            }
5269
0
        }
5270
5271
0
        if (num_of_bits > 0) {
5272
0
            ColumnString::Chars off_sep_combo;
5273
0
            off_sep_combo.reserve(separator.size + off.size);
5274
0
            off_sep_combo.insert(off_sep_combo.end(), off.data, off.data + off.size);
5275
0
            off_sep_combo.insert(off_sep_combo.end(), separator.data,
5276
0
                                 separator.data + separator.size);
5277
5278
0
            for (size_t i = 0; i < num_of_bits; ++i) {
5279
0
                data.insert(off_sep_combo.data(), off_sep_combo.data() + off_sep_combo.size());
5280
0
            }
5281
0
            data.erase(data.end() - separator.size, data.end());
5282
0
        }
5283
5284
0
        res_col.insert_data(reinterpret_cast<const char*>(data.data()), data.size());
5285
0
    }
5286
};
5287
5288
// ATTN: for debug only
5289
// compute crc32 hash value as the same way in `VOlapTablePartitionParam::find_tablets()`
5290
class FunctionCrc32Internal : public IFunction {
5291
public:
5292
    static constexpr auto name = "crc32_internal";
5293
2
    static FunctionPtr create() { return std::make_shared<FunctionCrc32Internal>(); }
5294
0
    String get_name() const override { return name; }
5295
0
    size_t get_number_of_arguments() const override { return 0; }
5296
1
    bool is_variadic() const override { return true; }
5297
0
    bool use_default_implementation_for_nulls() const override { return false; }
5298
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5299
0
        return std::make_shared<DataTypeInt64>();
5300
0
    }
5301
5302
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5303
0
                        uint32_t result, size_t input_rows_count) const override {
5304
0
        DCHECK_GE(arguments.size(), 1);
5305
5306
0
        auto argument_size = arguments.size();
5307
0
        std::vector<ColumnPtr> argument_columns(argument_size);
5308
0
        std::vector<PrimitiveType> argument_primitive_types(argument_size);
5309
5310
0
        for (size_t i = 0; i < argument_size; ++i) {
5311
0
            argument_columns[i] =
5312
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
5313
0
            argument_primitive_types[i] =
5314
0
                    block.get_by_position(arguments[i]).type->get_primitive_type();
5315
0
        }
5316
5317
0
        auto res_col = ColumnInt64::create();
5318
0
        auto& res_data = res_col->get_data();
5319
0
        res_data.resize_fill(input_rows_count, 0);
5320
5321
0
        for (size_t i = 0; i < input_rows_count; ++i) {
5322
0
            uint32_t hash_val = 0;
5323
0
            for (size_t j = 0; j < argument_size; ++j) {
5324
0
                const auto& column = argument_columns[j];
5325
0
                auto primitive_type = argument_primitive_types[j];
5326
0
                auto val = column->get_data_at(i);
5327
0
                if (val.data != nullptr) {
5328
0
                    hash_val = RawValue::zlib_crc32(val.data, val.size, primitive_type, hash_val);
5329
0
                } else {
5330
0
                    hash_val = HashUtil::zlib_crc_hash_null(hash_val);
5331
0
                }
5332
0
            }
5333
0
            res_data[i] = hash_val;
5334
0
        }
5335
5336
0
        block.replace_by_position(result, std::move(res_col));
5337
0
        return Status::OK();
5338
0
    }
5339
};
5340
5341
class FunctionUnicodeNormalize : public IFunction {
5342
public:
5343
    static constexpr auto name = "unicode_normalize";
5344
5345
9
    static FunctionPtr create() { return std::make_shared<FunctionUnicodeNormalize>(); }
5346
5347
3
    String get_name() const override { return name; }
5348
5349
7
    size_t get_number_of_arguments() const override { return 2; }
5350
5351
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5352
7
        if (arguments.size() != 2 || !is_string_type(arguments[0]->get_primitive_type()) ||
5353
7
            !is_string_type(arguments[1]->get_primitive_type())) {
5354
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
5355
0
                                   "Illegal type {} and {} of arguments of function {}",
5356
0
                                   arguments[0]->get_name(), arguments[1]->get_name(), get_name());
5357
0
        }
5358
7
        return arguments[0];
5359
7
    }
5360
5361
10
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
5362
5363
12
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
5364
12
        if (scope == FunctionContext::THREAD_LOCAL) {
5365
5
            return Status::OK();
5366
5
        }
5367
5368
7
        if (!context->is_col_constant(1)) {
5369
1
            return Status::InvalidArgument(
5370
1
                    "The second argument 'mode' of function {} must be constant", get_name());
5371
1
        }
5372
5373
6
        auto* const_col = context->get_constant_col(1);
5374
6
        auto mode_ref = const_col->column_ptr->get_data_at(0);
5375
6
        std::string lower_mode = doris::to_lower(std::string(doris::trim(mode_ref.to_string())));
5376
5377
6
        UErrorCode status = U_ZERO_ERROR;
5378
6
        const icu::Normalizer2* normalizer = nullptr;
5379
5380
6
        if (lower_mode == "nfc") {
5381
2
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfc", UNORM2_COMPOSE, status);
5382
4
        } else if (lower_mode == "nfd") {
5383
1
            normalizer = icu::Normalizer2::getNFDInstance(status);
5384
3
        } else if (lower_mode == "nfkc") {
5385
0
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc", UNORM2_COMPOSE, status);
5386
3
        } else if (lower_mode == "nfkd") {
5387
1
            normalizer = icu::Normalizer2::getNFKDInstance(status);
5388
2
        } else if (lower_mode == "nfkc_cf") {
5389
1
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, status);
5390
1
        } else {
5391
1
            return Status::InvalidArgument(
5392
1
                    "Invalid normalization mode '{}' for function {}. "
5393
1
                    "Supported modes: NFC, NFD, NFKC, NFKD, NFKC_CF",
5394
1
                    lower_mode, get_name());
5395
1
        }
5396
5397
5
        if (U_FAILURE(status) || normalizer == nullptr) {
5398
0
            return Status::InvalidArgument(
5399
0
                    "Failed to get normalizer instance for mode '{}' in function {}: {}",
5400
0
                    lower_mode, get_name(), u_errorName(status));
5401
0
        }
5402
5403
5
        auto state = std::make_shared<UnicodeNormalizeState>();
5404
5
        state->normalizer = normalizer;
5405
5
        context->set_function_state(scope, state);
5406
5
        return Status::OK();
5407
5
    }
5408
5409
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5410
5
                        uint32_t result, size_t input_rows_count) const override {
5411
5
        auto* state = reinterpret_cast<UnicodeNormalizeState*>(
5412
5
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
5413
5
        if (state == nullptr || state->normalizer == nullptr) {
5414
0
            return Status::RuntimeError("unicode_normalize function state is not initialized");
5415
0
        }
5416
5417
5
        ColumnPtr col =
5418
5
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
5419
5
        const auto* col_str = check_and_get_column<ColumnString>(col.get());
5420
5
        if (col_str == nullptr) {
5421
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
5422
0
                                        block.get_by_position(arguments[0]).column->get_name(),
5423
0
                                        get_name());
5424
0
        }
5425
5426
5
        const auto& data = col_str->get_chars();
5427
5
        const auto& offsets = col_str->get_offsets();
5428
5429
5
        auto res = ColumnString::create();
5430
5
        auto& res_data = res->get_chars();
5431
5
        auto& res_offsets = res->get_offsets();
5432
5433
5
        size_t rows = offsets.size();
5434
5
        res_offsets.resize(rows);
5435
5436
5
        std::string tmp;
5437
10
        for (size_t i = 0; i < rows; ++i) {
5438
5
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
5439
5
            size_t len = offsets[i] - offsets[i - 1];
5440
5441
5
            normalize_one(state->normalizer, begin, len, tmp);
5442
5
            StringOP::push_value_string(tmp, i, res_data, res_offsets);
5443
5
        }
5444
5445
5
        block.replace_by_position(result, std::move(res));
5446
5
        return Status::OK();
5447
5
    }
5448
5449
private:
5450
    struct UnicodeNormalizeState {
5451
        const icu::Normalizer2* normalizer = nullptr;
5452
    };
5453
5454
    static void normalize_one(const icu::Normalizer2* normalizer, const char* input, size_t length,
5455
5
                              std::string& output) {
5456
5
        if (length == 0) {
5457
0
            output.clear();
5458
0
            return;
5459
0
        }
5460
5461
5
        icu::StringPiece sp(input, static_cast<int32_t>(length));
5462
5
        icu::UnicodeString src16 = icu::UnicodeString::fromUTF8(sp);
5463
5464
5
        UErrorCode status = U_ZERO_ERROR;
5465
5
        UNormalizationCheckResult quick = normalizer->quickCheck(src16, status);
5466
5
        if (U_SUCCESS(status) && quick == UNORM_YES) {
5467
2
            output.assign(input, length);
5468
2
            return;
5469
2
        }
5470
5471
3
        icu::UnicodeString result16;
5472
3
        status = U_ZERO_ERROR;
5473
3
        normalizer->normalize(src16, result16, status);
5474
3
        if (U_FAILURE(status)) {
5475
0
            output.assign(input, length);
5476
0
            return;
5477
0
        }
5478
5479
3
        output.clear();
5480
3
        result16.toUTF8String(output);
5481
3
    }
5482
};
5483
5484
#include "common/compile_check_avoid_end.h"
5485
} // namespace doris