Coverage Report

Created: 2026-03-16 08:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <sys/types.h>
22
23
#include <algorithm>
24
#include <array>
25
#include <boost/iterator/iterator_facade.hpp>
26
#include <boost/locale.hpp>
27
#include <climits>
28
#include <cmath>
29
#include <cstddef>
30
#include <cstdlib>
31
#include <cstring>
32
#include <iomanip>
33
#include <memory>
34
#include <ostream>
35
#include <random>
36
#include <sstream>
37
#include <tuple>
38
#include <type_traits>
39
#include <unordered_map>
40
#include <utility>
41
#include <variant>
42
#include <vector>
43
44
#include "common/compiler_util.h" // IWYU pragma: keep
45
#include "common/exception.h"
46
#include "common/status.h"
47
#include "core/block/block.h"
48
#include "core/block/column_numbers.h"
49
#include "core/block/column_with_type_and_name.h"
50
#include "core/column/column.h"
51
#include "core/column/column_const.h"
52
#include "core/column/column_varbinary.h"
53
#include "core/column/column_vector.h"
54
#include "core/data_type/data_type.h"
55
#include "core/data_type/define_primitive_type.h"
56
#include "core/data_type/primitive_type.h"
57
#include "core/memcmp_small.h"
58
#include "core/memcpy_small.h"
59
#include "core/pod_array.h"
60
#include "core/pod_array_fwd.h"
61
#include "core/types.h"
62
#include "core/value/decimalv2_value.h"
63
#include "exec/common/hash_table/phmap_fwd_decl.h"
64
#include "exec/common/int_exp.h"
65
#include "exec/common/template_helpers.hpp"
66
#include "exprs/aggregate/aggregate_function.h"
67
#include "exprs/function/function_needs_to_handle_null.h"
68
#include "util/raw_value.h"
69
#include "util/sha.h"
70
#include "util/string_search.hpp"
71
#include "util/string_util.h"
72
#include "util/utf8_check.h"
73
74
#ifndef USE_LIBCPP
75
#include <memory_resource>
76
#define PMR std::pmr
77
#else
78
#include <boost/container/pmr/monotonic_buffer_resource.hpp>
79
#include <boost/container/pmr/vector.hpp>
80
#define PMR boost::container::pmr
81
#endif
82
83
#include <fmt/format.h>
84
#include <unicode/normalizer2.h>
85
#include <unicode/stringpiece.h>
86
#include <unicode/unistr.h>
87
88
#include <cstdint>
89
#include <string>
90
#include <string_view>
91
92
#include "core/assert_cast.h"
93
#include "core/column/column_array.h"
94
#include "core/column/column_decimal.h"
95
#include "core/column/column_nullable.h"
96
#include "core/column/column_string.h"
97
#include "core/data_type/data_type_array.h"
98
#include "core/data_type/data_type_decimal.h"
99
#include "core/data_type/data_type_nullable.h"
100
#include "core/data_type/data_type_number.h"
101
#include "core/data_type/data_type_string.h"
102
#include "core/string_ref.h"
103
#include "exec/common/pinyin.h"
104
#include "exec/common/stringop_substring.h"
105
#include "exec/common/util.hpp"
106
#include "exprs/function/function.h"
107
#include "exprs/function/function_helpers.h"
108
#include "exprs/function_context.h"
109
#include "exprs/math_functions.h"
110
#include "pugixml.hpp"
111
#include "util/md5.h"
112
#include "util/simd/vstring_function.h"
113
#include "util/sm3.h"
114
#include "util/url_coding.h"
115
#include "util/url_parser.h"
116
117
namespace doris {
118
#include "common/compile_check_avoid_begin.h"
119
class FunctionStrcmp : public IFunction {
120
public:
121
    static constexpr auto name = "strcmp";
122
123
9
    static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); }
124
125
1
    String get_name() const override { return name; }
126
127
0
    size_t get_number_of_arguments() const override { return 2; }
128
129
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
130
0
        return std::make_shared<DataTypeInt8>();
131
0
    }
132
133
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
134
0
                        uint32_t result, size_t input_rows_count) const override {
135
0
        const auto& [arg0_column, arg0_const] =
136
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
137
0
        const auto& [arg1_column, arg1_const] =
138
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
139
140
0
        auto result_column = ColumnInt8::create(input_rows_count);
141
142
0
        if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) {
143
0
            if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) {
144
0
                if (arg0_const) {
145
0
                    scalar_vector(arg0->get_data_at(0), *arg1, *result_column);
146
0
                } else if (arg1_const) {
147
0
                    vector_scalar(*arg0, arg1->get_data_at(0), *result_column);
148
0
                } else {
149
0
                    vector_vector(*arg0, *arg1, *result_column);
150
0
                }
151
0
            }
152
0
        }
153
154
0
        block.replace_by_position(result, std::move(result_column));
155
0
        return Status::OK();
156
0
    }
157
158
private:
159
0
    static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) {
160
0
        size_t size = vec1.size();
161
0
        for (size_t i = 0; i < size; ++i) {
162
0
            res.get_data()[i] = str.compare(vec1.get_data_at(i));
163
0
        }
164
0
    }
165
166
0
    static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) {
167
0
        size_t size = vec0.size();
168
0
        for (size_t i = 0; i < size; ++i) {
169
0
            res.get_data()[i] = vec0.get_data_at(i).compare(str);
170
0
        }
171
0
    }
172
173
0
    static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) {
174
0
        size_t size = vec0.size();
175
0
        for (size_t i = 0; i < size; ++i) {
176
0
            res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i));
177
0
        }
178
0
    }
179
};
180
181
class FunctionAutoPartitionName : public IFunction {
182
public:
183
    static constexpr auto name = "auto_partition_name";
184
70
    static FunctionPtr create() { return std::make_shared<FunctionAutoPartitionName>(); }
185
0
    String get_name() const override { return name; }
186
0
    size_t get_number_of_arguments() const override { return 0; }
187
62
    bool is_variadic() const override { return true; }
188
150
    bool use_default_implementation_for_nulls() const override { return false; }
189
61
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
190
61
        return std::make_shared<DataTypeString>();
191
61
    }
192
193
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
194
89
                        uint32_t result, size_t input_rows_count) const override {
195
89
        size_t argument_size = arguments.size();
196
89
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
197
89
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
198
89
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
199
89
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
200
89
        std::vector<bool> is_const_args(argument_size);
201
89
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
202
89
        std::vector<ColumnPtr> argument_null_columns(argument_size);
203
204
89
        std::vector<ColumnPtr> argument_columns(argument_size);
205
350
        for (int i = 0; i < argument_size; ++i) {
206
261
            argument_columns[i] =
207
261
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
208
261
            if (const auto* nullable =
209
261
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
210
27
                null_list[i] = &nullable->get_null_map_data();
211
27
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
212
27
                argument_columns[i] = nullable->get_nested_column_ptr();
213
234
            } else {
214
234
                null_list[i] = &const_null_map->get_data();
215
234
            }
216
217
261
            const auto& [col, is_const] =
218
261
                    unpack_if_const(block.get_by_position(arguments[i]).column);
219
220
261
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
221
261
            chars_list[i] = &col_str->get_chars();
222
261
            offsets_list[i] = &col_str->get_offsets();
223
261
            is_const_args[i] = is_const;
224
261
        }
225
226
89
        auto res = ColumnString::create();
227
89
        auto& res_data = res->get_chars();
228
89
        auto& res_offset = res->get_offsets();
229
89
        res_offset.resize(input_rows_count);
230
231
89
        const char* partition_type = chars_list[0]->raw_data();
232
        // partition type is list|range
233
89
        if (std::strncmp(partition_type, "list", 4) == 0) {
234
43
            return _auto_partition_type_of_list(chars_list, offsets_list, is_const_args, null_list,
235
43
                                                res_data, res_offset, input_rows_count,
236
43
                                                argument_size, block, result, res);
237
46
        } else {
238
46
            return _auto_partition_type_of_range(chars_list, offsets_list, is_const_args, res_data,
239
46
                                                 res_offset, input_rows_count, argument_size, block,
240
46
                                                 result, res);
241
46
        }
242
0
        return Status::OK();
243
89
    }
244
245
private:
246
62
    std::u16string _string_to_u16string(const std::string& str) const {
247
62
        return boost::locale::conv::utf_to_utf<char16_t>(str);
248
62
    }
249
250
62
    std::string _string_to_unicode(const std::u16string& s) const {
251
62
        std::string res_s;
252
62
        res_s.reserve(s.size());
253
62
        if (s.length() > 0 && s[0] == '-') {
254
1
            res_s += '_';
255
1
        }
256
957
        for (int i = 0; i < s.length(); i++) {
257
895
            char16_t ch = s[i];
258
895
            if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
259
514
                res_s += ch;
260
514
            } else {
261
381
                int unicodeValue = _get_code_point_at(s, i);
262
381
                res_s += fmt::format("{:02x}", static_cast<uint32_t>(unicodeValue));
263
381
            }
264
895
        }
265
62
        return res_s;
266
62
    }
267
268
381
    int _get_code_point_at(const std::u16string& str, std::size_t index) const {
269
381
        char16_t first = str[index];
270
        // [0xD800,0xDBFF] is the scope of the first code unit
271
381
        if ((first >= 0xD800 && first <= 0xDBFF) && (index + 1 < str.size())) {
272
0
            char16_t second = str[index + 1];
273
            // [0xDC00,0xDFFF] is the scope of the second code unit
274
0
            if (second >= 0xDC00 && second <= 0xDFFF) {
275
0
                return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000;
276
0
            }
277
0
        }
278
279
381
        return first;
280
381
    }
281
    Status _auto_partition_type_of_list(std::vector<const ColumnString::Chars*>& chars_list,
282
                                        std::vector<const ColumnString::Offsets*>& offsets_list,
283
                                        std::vector<bool>& is_const_args,
284
                                        const std::vector<const ColumnUInt8::Container*>& null_list,
285
                                        auto& res_data, auto& res_offset, size_t input_rows_count,
286
                                        size_t argument_size, Block& block, uint32_t result,
287
43
                                        auto& res) const {
288
43
        int curr_len = 0;
289
86
        for (int row = 0; row < input_rows_count; row++) {
290
43
            std::string res_p;
291
43
            res_p.reserve(argument_size * 5);
292
43
            res_p += 'p';
293
123
            for (int col = 1; col < argument_size; col++) {
294
80
                const auto& current_offsets = *offsets_list[col];
295
80
                const auto& current_chars = *chars_list[col];
296
80
                const auto& current_nullmap = *null_list[col];
297
298
80
                if (current_nullmap[row]) {
299
18
                    res_p += 'X';
300
62
                } else {
301
62
                    auto idx = index_check_const(row, is_const_args[col]);
302
303
62
                    int size = current_offsets[idx] - current_offsets[idx - 1];
304
62
                    const char* raw_chars =
305
62
                            reinterpret_cast<const char*>(&current_chars[current_offsets[idx - 1]]);
306
                    // convert string to u16string in order to convert to unicode strings
307
62
                    const std::string raw_str(raw_chars, size);
308
62
                    auto u16string = _string_to_u16string(raw_str);
309
62
                    res_p += _string_to_unicode(u16string) + std::to_string(u16string.size());
310
62
                }
311
80
            }
312
313
            // check the name of length
314
43
            int len = res_p.size();
315
43
            if (len > 50) {
316
7
                res_p = std::format("{}_{:08x}", res_p.substr(0, 50), to_hash_code(res_p));
317
7
                len = res_p.size();
318
7
            }
319
43
            curr_len += len;
320
43
            res_data.resize(curr_len);
321
43
            memcpy(&res_data[res_offset[row - 1]], res_p.c_str(), len);
322
43
            res_offset[row] = res_offset[row - 1] + len;
323
43
        }
324
43
        block.get_by_position(result).column = std::move(res);
325
43
        return Status::OK();
326
43
    }
327
328
    size_t _copy_date_str_of_len_to_res_data(auto& res_data, auto& res_offset,
329
                                             std::vector<std::string>& date_str, size_t row,
330
88
                                             size_t len) const {
331
88
        size_t curr_len = 1;
332
380
        for (int j = 0; j < len; j++) {
333
292
            memcpy(&res_data[res_offset[row - 1]] + curr_len, date_str[j].c_str(),
334
292
                   date_str[j].size());
335
292
            curr_len += date_str[j].size();
336
292
        }
337
88
        return curr_len;
338
88
    }
339
340
    Status _auto_partition_type_of_range(std::vector<const ColumnString::Chars*>& chars_list,
341
                                         std::vector<const ColumnString::Offsets*>& offsets_list,
342
                                         std::vector<bool>& is_const_args, auto& res_data,
343
                                         auto& res_offset, size_t input_rows_count,
344
                                         size_t argument_size, Block& block, uint32_t result,
345
46
                                         auto& res) const {
346
46
        const char* range_type = chars_list[1]->raw_data();
347
348
46
        res_data.resize(15 * input_rows_count);
349
134
        for (int i = 0; i < input_rows_count; i++) {
350
94
            const auto& current_offsets = *offsets_list[2];
351
94
            const auto& current_chars = *chars_list[2];
352
353
94
            auto idx = index_check_const(i, is_const_args[2]);
354
94
            int size = current_offsets[idx] - current_offsets[idx - 1];
355
94
            const char* tmp =
356
94
                    reinterpret_cast<const char*>(&current_chars[current_offsets[idx - 1]]);
357
94
            std::string to_split_s(tmp, size);
358
359
            // check the str if it is date|datetime
360
94
            RE2 date_regex(R"(^\d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?$)");
361
94
            if (!RE2::FullMatch(to_split_s, date_regex)) {
362
6
                return Status::InvalidArgument("The range partition only support DATE|DATETIME");
363
6
            }
364
365
            // split date_str from (yyyy-mm-dd hh:mm:ss) to ([yyyy, mm, dd, hh, mm, ss])
366
88
            std::vector<std::string> date_str(6);
367
88
            date_str[0] = to_split_s.substr(0, 4);
368
300
            for (int ni = 5, j = 1; ni <= size; ni += 3, j++) {
369
212
                date_str[j] = to_split_s.substr(ni, 2);
370
212
            }
371
88
            int curr_len = 0;
372
373
88
            res_data[res_offset[i - 1]] = 'p';
374
            // raw => 2022-12-12 11:30:20
375
            // year => 2022 01 01 00 00 00
376
            // month => 2022 12 01 00 00 00
377
            // day => 2022 12 12 00 00 00
378
            // hour => 2022 12 12 11 00 00
379
            // minute => 2022 12  11 30 00
380
            // second => 2022 12 12 12 30 20
381
382
88
            if (!strncmp(range_type, "year", 4)) {
383
17
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 1);
384
17
                memcpy(&res_data[res_offset[i - 1]] + curr_len, "0101", 4);
385
17
                curr_len += 4;
386
71
            } else if (!strncmp(range_type, "month", 5)) {
387
16
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 2);
388
16
                memcpy(&res_data[res_offset[i - 1]] + curr_len, "01", 2);
389
16
                curr_len += 2;
390
55
            } else if (!strncmp(range_type, "day", 3)) {
391
16
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 3);
392
39
            } else if (!strncmp(range_type, "hour", 4)) {
393
13
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 4);
394
26
            } else if (!strncmp(range_type, "minute", 6)) {
395
13
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 5);
396
13
            } else if (!strncmp(range_type, "second", 6)) {
397
13
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 6);
398
13
            }
399
400
            // fill in zero
401
88
            int zero = 15 - curr_len;
402
88
            std::fill_n(&res_data[res_offset[i - 1]] + curr_len, zero, '0');
403
88
            curr_len += zero;
404
88
            res_offset[i] = res_offset[i - 1] + curr_len;
405
88
        }
406
40
        block.get_by_position(result).column = std::move(res);
407
40
        return Status::OK();
408
46
    }
409
410
7
    int32_t to_hash_code(const std::string& str) const {
411
7
        uint64_t h = 0;
412
1.37k
        for (uint8_t c : str) {
413
1.37k
            h = (h * 31U + c) & 0xFFFFFFFFU;
414
1.37k
        }
415
7
        return static_cast<int32_t>(h);
416
7
    }
417
};
418
419
template <typename Impl>
420
class FunctionSubstring : public IFunction {
421
public:
422
    static constexpr auto name = SubstringUtil::name;
423
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE8get_nameB5cxx11Ev
Line
Count
Source
423
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE8get_nameB5cxx11Ev
Line
Count
Source
423
1
    String get_name() const override { return name; }
424
29.7k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr3ImplEE6createEv
Line
Count
Source
424
29.6k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr2ImplEE6createEv
Line
Count
Source
424
94
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
425
426
29.7k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
29.7k
        return std::make_shared<DataTypeString>();
428
29.7k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
426
29.6k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
29.6k
        return std::make_shared<DataTypeString>();
428
29.6k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
426
85
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
85
        return std::make_shared<DataTypeString>();
428
85
    }
429
29.7k
    DataTypes get_variadic_argument_types_impl() const override {
430
29.7k
        return Impl::get_variadic_argument_types();
431
29.7k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
429
29.6k
    DataTypes get_variadic_argument_types_impl() const override {
430
29.6k
        return Impl::get_variadic_argument_types();
431
29.6k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
429
93
    DataTypes get_variadic_argument_types_impl() const override {
430
93
        return Impl::get_variadic_argument_types();
431
93
    }
432
29.7k
    size_t get_number_of_arguments() const override {
433
29.7k
        return get_variadic_argument_types_impl().size();
434
29.7k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE23get_number_of_argumentsEv
Line
Count
Source
432
29.6k
    size_t get_number_of_arguments() const override {
433
29.6k
        return get_variadic_argument_types_impl().size();
434
29.6k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE23get_number_of_argumentsEv
Line
Count
Source
432
85
    size_t get_number_of_arguments() const override {
433
85
        return get_variadic_argument_types_impl().size();
434
85
    }
435
436
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
437
29.3k
                        uint32_t result, size_t input_rows_count) const override {
438
29.3k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
29.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
437
29.3k
                        uint32_t result, size_t input_rows_count) const override {
438
29.3k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
29.3k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
437
55
                        uint32_t result, size_t input_rows_count) const override {
438
55
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
55
    }
440
};
441
442
struct Substr3Impl {
443
29.6k
    static DataTypes get_variadic_argument_types() {
444
29.6k
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
445
29.6k
                std::make_shared<DataTypeInt32>()};
446
29.6k
    }
447
448
    static Status execute_impl(FunctionContext* context, Block& block,
449
                               const ColumnNumbers& arguments, uint32_t result,
450
29.3k
                               size_t input_rows_count) {
451
29.3k
        SubstringUtil::substring_execute(block, arguments, result, input_rows_count);
452
29.3k
        return Status::OK();
453
29.3k
    }
454
};
455
456
struct Substr2Impl {
457
93
    static DataTypes get_variadic_argument_types() {
458
93
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
459
93
    }
460
461
    static Status execute_impl(FunctionContext* context, Block& block,
462
                               const ColumnNumbers& arguments, uint32_t result,
463
55
                               size_t input_rows_count) {
464
55
        auto col_len = ColumnInt32::create(input_rows_count);
465
55
        auto& strlen_data = col_len->get_data();
466
467
55
        ColumnPtr str_col;
468
55
        bool str_const;
469
55
        std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
470
471
55
        const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
472
473
55
        if (str_const) {
474
18
            std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
475
37
        } else {
476
101
            for (int i = 0; i < input_rows_count; ++i) {
477
64
                strlen_data[i] = str_offset[i] - str_offset[i - 1];
478
64
            }
479
37
        }
480
481
        // we complete the column2(strlen) with the default value - each row's strlen.
482
55
        block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"});
483
55
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
484
485
55
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
486
55
        return Status::OK();
487
55
    }
488
};
489
490
template <bool Reverse>
491
class FunctionMaskPartial;
492
493
class FunctionMask : public IFunction {
494
public:
495
    static constexpr auto name = "mask";
496
    static constexpr unsigned char DEFAULT_UPPER_MASK = 'X';
497
    static constexpr unsigned char DEFAULT_LOWER_MASK = 'x';
498
    static constexpr unsigned char DEFAULT_NUMBER_MASK = 'n';
499
9
    String get_name() const override { return name; }
500
58
    static FunctionPtr create() { return std::make_shared<FunctionMask>(); }
501
502
49
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
503
49
        return std::make_shared<DataTypeString>();
504
49
    }
505
506
0
    size_t get_number_of_arguments() const override { return 0; }
507
508
147
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1, 2, 3}; }
509
510
50
    bool is_variadic() const override { return true; }
511
512
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
513
96
                        uint32_t result, size_t input_rows_count) const override {
514
96
        DCHECK_GE(arguments.size(), 1);
515
96
        DCHECK_LE(arguments.size(), 4);
516
517
96
        char upper = DEFAULT_UPPER_MASK, lower = DEFAULT_LOWER_MASK, number = DEFAULT_NUMBER_MASK;
518
519
96
        auto res = ColumnString::create();
520
96
        const auto& source_column =
521
96
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
522
523
96
        if (arguments.size() > 1) {
524
38
            const auto& col = *block.get_by_position(arguments[1]).column;
525
38
            auto string_ref = col.get_data_at(0);
526
38
            if (string_ref.size > 0) {
527
38
                upper = *string_ref.data;
528
38
            }
529
38
        }
530
531
96
        if (arguments.size() > 2) {
532
22
            const auto& col = *block.get_by_position(arguments[2]).column;
533
22
            auto string_ref = col.get_data_at(0);
534
22
            if (string_ref.size > 0) {
535
22
                lower = *string_ref.data;
536
22
            }
537
22
        }
538
539
96
        if (arguments.size() > 3) {
540
12
            const auto& col = *block.get_by_position(arguments[3]).column;
541
12
            auto string_ref = col.get_data_at(0);
542
12
            if (string_ref.size > 0) {
543
12
                number = *string_ref.data;
544
12
            }
545
12
        }
546
547
96
        if (arguments.size() > 4) {
548
0
            return Status::InvalidArgument(
549
0
                    fmt::format("too many arguments for function {}", get_name()));
550
0
        }
551
552
96
        vector_mask(source_column, *res, upper, lower, number);
553
554
96
        block.get_by_position(result).column = std::move(res);
555
556
96
        return Status::OK();
557
96
    }
558
    friend class FunctionMaskPartial<true>;
559
    friend class FunctionMaskPartial<false>;
560
561
private:
562
    static void vector_mask(const ColumnString& source, ColumnString& result, const char upper,
563
166
                            const char lower, const char number) {
564
166
        result.get_chars().resize(source.get_chars().size());
565
166
        result.get_offsets().resize(source.get_offsets().size());
566
166
        memcpy_small_allow_read_write_overflow15(
567
166
                result.get_offsets().data(), source.get_offsets().data(),
568
166
                source.get_offsets().size() * sizeof(ColumnString::Offset));
569
570
166
        const unsigned char* src = source.get_chars().data();
571
166
        const size_t size = source.get_chars().size();
572
166
        unsigned char* res = result.get_chars().data();
573
166
        mask(src, size, upper, lower, number, res);
574
166
    }
575
576
    static void mask(const unsigned char* __restrict src, const size_t size,
577
                     const unsigned char upper, const unsigned char lower,
578
314
                     const unsigned char number, unsigned char* __restrict res) {
579
3.67k
        for (size_t i = 0; i != size; ++i) {
580
3.36k
            auto c = src[i];
581
3.36k
            if (c >= 'A' && c <= 'Z') {
582
260
                res[i] = upper;
583
3.10k
            } else if (c >= 'a' && c <= 'z') {
584
1.81k
                res[i] = lower;
585
1.81k
            } else if (c >= '0' && c <= '9') {
586
1.12k
                res[i] = number;
587
1.12k
            } else {
588
158
                res[i] = c;
589
158
            }
590
3.36k
        }
591
314
    }
592
};
593
594
template <bool Reverse>
595
class FunctionMaskPartial : public IFunction {
596
public:
597
    static constexpr auto name = Reverse ? "mask_last_n" : "mask_first_n";
598
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE8get_nameB5cxx11Ev
599
118
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
_ZN5doris19FunctionMaskPartialILb1EE6createEv
Line
Count
Source
599
59
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
_ZN5doris19FunctionMaskPartialILb0EE6createEv
Line
Count
Source
599
59
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
600
601
100
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
602
100
        return std::make_shared<DataTypeString>();
603
100
    }
_ZNK5doris19FunctionMaskPartialILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
601
50
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
602
50
        return std::make_shared<DataTypeString>();
603
50
    }
_ZNK5doris19FunctionMaskPartialILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Line
Count
Source
601
50
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
602
50
        return std::make_shared<DataTypeString>();
603
50
    }
604
605
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE23get_number_of_argumentsEv
606
607
102
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionMaskPartialILb1EE11is_variadicEv
Line
Count
Source
607
51
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionMaskPartialILb0EE11is_variadicEv
Line
Count
Source
607
51
    bool is_variadic() const override { return true; }
608
609
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
610
208
                        uint32_t result, size_t input_rows_count) const override {
611
208
        auto res = ColumnString::create();
612
208
        auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
613
208
        const auto& source_column = assert_cast<const ColumnString&>(*col);
614
615
208
        if (arguments.size() == 1) { // no 2nd arg, just mask all
616
70
            FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK,
617
70
                                      FunctionMask::DEFAULT_LOWER_MASK,
618
70
                                      FunctionMask::DEFAULT_NUMBER_MASK);
619
138
        } else {
620
138
            const auto& [col_2nd, is_const] =
621
138
                    unpack_if_const(block.get_by_position(arguments[1]).column);
622
623
138
            const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd);
624
625
138
            if (is_const) {
626
64
                RETURN_IF_ERROR(vector<true>(source_column, col_n, *res));
627
74
            } else {
628
74
                RETURN_IF_ERROR(vector<false>(source_column, col_n, *res));
629
74
            }
630
138
        }
631
632
198
        block.get_by_position(result).column = std::move(res);
633
634
198
        return Status::OK();
635
208
    }
_ZNK5doris19FunctionMaskPartialILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
610
104
                        uint32_t result, size_t input_rows_count) const override {
611
104
        auto res = ColumnString::create();
612
104
        auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
613
104
        const auto& source_column = assert_cast<const ColumnString&>(*col);
614
615
104
        if (arguments.size() == 1) { // no 2nd arg, just mask all
616
35
            FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK,
617
35
                                      FunctionMask::DEFAULT_LOWER_MASK,
618
35
                                      FunctionMask::DEFAULT_NUMBER_MASK);
619
69
        } else {
620
69
            const auto& [col_2nd, is_const] =
621
69
                    unpack_if_const(block.get_by_position(arguments[1]).column);
622
623
69
            const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd);
624
625
69
            if (is_const) {
626
32
                RETURN_IF_ERROR(vector<true>(source_column, col_n, *res));
627
37
            } else {
628
37
                RETURN_IF_ERROR(vector<false>(source_column, col_n, *res));
629
37
            }
630
69
        }
631
632
99
        block.get_by_position(result).column = std::move(res);
633
634
99
        return Status::OK();
635
104
    }
_ZNK5doris19FunctionMaskPartialILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
610
104
                        uint32_t result, size_t input_rows_count) const override {
611
104
        auto res = ColumnString::create();
612
104
        auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
613
104
        const auto& source_column = assert_cast<const ColumnString&>(*col);
614
615
104
        if (arguments.size() == 1) { // no 2nd arg, just mask all
616
35
            FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK,
617
35
                                      FunctionMask::DEFAULT_LOWER_MASK,
618
35
                                      FunctionMask::DEFAULT_NUMBER_MASK);
619
69
        } else {
620
69
            const auto& [col_2nd, is_const] =
621
69
                    unpack_if_const(block.get_by_position(arguments[1]).column);
622
623
69
            const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd);
624
625
69
            if (is_const) {
626
32
                RETURN_IF_ERROR(vector<true>(source_column, col_n, *res));
627
37
            } else {
628
37
                RETURN_IF_ERROR(vector<false>(source_column, col_n, *res));
629
37
            }
630
69
        }
631
632
99
        block.get_by_position(result).column = std::move(res);
633
634
99
        return Status::OK();
635
104
    }
636
637
private:
638
    template <bool is_const>
639
138
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
138
        const auto num_rows = src.size();
641
138
        const auto* chars = src.get_chars().data();
642
138
        const auto* offsets = src.get_offsets().data();
643
138
        result.get_chars().resize(src.get_chars().size());
644
138
        result.get_offsets().resize(src.get_offsets().size());
645
138
        memcpy_small_allow_read_write_overflow15(
646
138
                result.get_offsets().data(), src.get_offsets().data(),
647
138
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
138
        auto* res = result.get_chars().data();
649
650
138
        const auto& col_n_data = col_n.get_data();
651
652
286
        for (ssize_t i = 0; i != num_rows; ++i) {
653
158
            auto offset = offsets[i - 1];
654
158
            int len = offsets[i] - offset;
655
158
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
158
            if (n < 0) [[unlikely]] {
658
10
                return Status::InvalidArgument(
659
10
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
10
                        name, n);
661
10
            }
662
663
148
            if constexpr (Reverse) {
664
74
                auto start = std::max(len - n, 0);
665
74
                if (start > 0) {
666
48
                    memcpy(&res[offset], &chars[offset], start);
667
48
                }
668
74
                offset += start;
669
74
            } else {
670
74
                if (n < len) {
671
48
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
48
                }
673
74
            }
674
675
148
            len = std::min(n, len);
676
148
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
148
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
148
                               &res[offset]);
679
148
        }
680
681
128
        return Status::OK();
682
138
    }
_ZN5doris19FunctionMaskPartialILb1EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Line
Count
Source
639
32
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
32
        const auto num_rows = src.size();
641
32
        const auto* chars = src.get_chars().data();
642
32
        const auto* offsets = src.get_offsets().data();
643
32
        result.get_chars().resize(src.get_chars().size());
644
32
        result.get_offsets().resize(src.get_offsets().size());
645
32
        memcpy_small_allow_read_write_overflow15(
646
32
                result.get_offsets().data(), src.get_offsets().data(),
647
32
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
32
        auto* res = result.get_chars().data();
649
650
32
        const auto& col_n_data = col_n.get_data();
651
652
72
        for (ssize_t i = 0; i != num_rows; ++i) {
653
40
            auto offset = offsets[i - 1];
654
40
            int len = offsets[i] - offset;
655
40
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
40
            if (n < 0) [[unlikely]] {
658
0
                return Status::InvalidArgument(
659
0
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
0
                        name, n);
661
0
            }
662
663
40
            if constexpr (Reverse) {
664
40
                auto start = std::max(len - n, 0);
665
40
                if (start > 0) {
666
20
                    memcpy(&res[offset], &chars[offset], start);
667
20
                }
668
40
                offset += start;
669
            } else {
670
                if (n < len) {
671
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
                }
673
            }
674
675
40
            len = std::min(n, len);
676
40
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
40
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
40
                               &res[offset]);
679
40
        }
680
681
32
        return Status::OK();
682
32
    }
_ZN5doris19FunctionMaskPartialILb1EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Line
Count
Source
639
37
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
37
        const auto num_rows = src.size();
641
37
        const auto* chars = src.get_chars().data();
642
37
        const auto* offsets = src.get_offsets().data();
643
37
        result.get_chars().resize(src.get_chars().size());
644
37
        result.get_offsets().resize(src.get_offsets().size());
645
37
        memcpy_small_allow_read_write_overflow15(
646
37
                result.get_offsets().data(), src.get_offsets().data(),
647
37
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
37
        auto* res = result.get_chars().data();
649
650
37
        const auto& col_n_data = col_n.get_data();
651
652
71
        for (ssize_t i = 0; i != num_rows; ++i) {
653
39
            auto offset = offsets[i - 1];
654
39
            int len = offsets[i] - offset;
655
39
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
39
            if (n < 0) [[unlikely]] {
658
5
                return Status::InvalidArgument(
659
5
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
5
                        name, n);
661
5
            }
662
663
34
            if constexpr (Reverse) {
664
34
                auto start = std::max(len - n, 0);
665
34
                if (start > 0) {
666
28
                    memcpy(&res[offset], &chars[offset], start);
667
28
                }
668
34
                offset += start;
669
            } else {
670
                if (n < len) {
671
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
                }
673
            }
674
675
34
            len = std::min(n, len);
676
34
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
34
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
34
                               &res[offset]);
679
34
        }
680
681
32
        return Status::OK();
682
37
    }
_ZN5doris19FunctionMaskPartialILb0EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Line
Count
Source
639
32
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
32
        const auto num_rows = src.size();
641
32
        const auto* chars = src.get_chars().data();
642
32
        const auto* offsets = src.get_offsets().data();
643
32
        result.get_chars().resize(src.get_chars().size());
644
32
        result.get_offsets().resize(src.get_offsets().size());
645
32
        memcpy_small_allow_read_write_overflow15(
646
32
                result.get_offsets().data(), src.get_offsets().data(),
647
32
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
32
        auto* res = result.get_chars().data();
649
650
32
        const auto& col_n_data = col_n.get_data();
651
652
72
        for (ssize_t i = 0; i != num_rows; ++i) {
653
40
            auto offset = offsets[i - 1];
654
40
            int len = offsets[i] - offset;
655
40
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
40
            if (n < 0) [[unlikely]] {
658
0
                return Status::InvalidArgument(
659
0
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
0
                        name, n);
661
0
            }
662
663
            if constexpr (Reverse) {
664
                auto start = std::max(len - n, 0);
665
                if (start > 0) {
666
                    memcpy(&res[offset], &chars[offset], start);
667
                }
668
                offset += start;
669
40
            } else {
670
40
                if (n < len) {
671
20
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
20
                }
673
40
            }
674
675
40
            len = std::min(n, len);
676
40
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
40
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
40
                               &res[offset]);
679
40
        }
680
681
32
        return Status::OK();
682
32
    }
_ZN5doris19FunctionMaskPartialILb0EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Line
Count
Source
639
37
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
37
        const auto num_rows = src.size();
641
37
        const auto* chars = src.get_chars().data();
642
37
        const auto* offsets = src.get_offsets().data();
643
37
        result.get_chars().resize(src.get_chars().size());
644
37
        result.get_offsets().resize(src.get_offsets().size());
645
37
        memcpy_small_allow_read_write_overflow15(
646
37
                result.get_offsets().data(), src.get_offsets().data(),
647
37
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
37
        auto* res = result.get_chars().data();
649
650
37
        const auto& col_n_data = col_n.get_data();
651
652
71
        for (ssize_t i = 0; i != num_rows; ++i) {
653
39
            auto offset = offsets[i - 1];
654
39
            int len = offsets[i] - offset;
655
39
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
39
            if (n < 0) [[unlikely]] {
658
5
                return Status::InvalidArgument(
659
5
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
5
                        name, n);
661
5
            }
662
663
            if constexpr (Reverse) {
664
                auto start = std::max(len - n, 0);
665
                if (start > 0) {
666
                    memcpy(&res[offset], &chars[offset], start);
667
                }
668
                offset += start;
669
34
            } else {
670
34
                if (n < len) {
671
28
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
28
                }
673
34
            }
674
675
34
            len = std::min(n, len);
676
34
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
34
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
34
                               &res[offset]);
679
34
        }
680
681
32
        return Status::OK();
682
37
    }
683
};
684
685
class FunctionLeft : public IFunction {
686
public:
687
    static constexpr auto name = "left";
688
200
    static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
689
1
    String get_name() const override { return name; }
690
191
    size_t get_number_of_arguments() const override { return 2; }
691
191
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
692
191
        return std::make_shared<DataTypeString>();
693
191
    }
694
695
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
696
206
                        uint32_t result, size_t input_rows_count) const override {
697
206
        DCHECK_EQ(arguments.size(), 2);
698
206
        auto res = ColumnString::create();
699
206
        bool col_const[2];
700
206
        ColumnPtr argument_columns[2];
701
618
        for (int i = 0; i < 2; ++i) {
702
412
            std::tie(argument_columns[i], col_const[i]) =
703
412
                    unpack_if_const(block.get_by_position(arguments[i]).column);
704
412
        }
705
706
206
        const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]);
707
206
        const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]);
708
206
        const auto is_ascii = str_col.is_ascii();
709
710
206
        std::visit(
711
206
                [&](auto is_ascii, auto str_const, auto len_const) {
712
206
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
206
                                                             input_rows_count);
714
206
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
711
10
                [&](auto is_ascii, auto str_const, auto len_const) {
712
10
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
10
                                                             input_rows_count);
714
10
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
711
8
                [&](auto is_ascii, auto str_const, auto len_const) {
712
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
8
                                                             input_rows_count);
714
8
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
711
8
                [&](auto is_ascii, auto str_const, auto len_const) {
712
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
8
                                                             input_rows_count);
714
8
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
711
68
                [&](auto is_ascii, auto str_const, auto len_const) {
712
68
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
68
                                                             input_rows_count);
714
68
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
711
76
                [&](auto is_ascii, auto str_const, auto len_const) {
712
76
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
76
                                                             input_rows_count);
714
76
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
715
206
                make_bool_variant(is_ascii), make_bool_variant(col_const[0]),
716
206
                make_bool_variant(col_const[1]));
717
718
206
        block.get_by_position(result).column = std::move(res);
719
206
        return Status::OK();
720
206
    }
721
722
    template <bool is_ascii, bool str_const, bool len_const>
723
    static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res,
724
206
                         size_t size) {
725
206
        auto& res_chars = res.get_chars();
726
206
        auto& res_offsets = res.get_offsets();
727
206
        res_offsets.resize(size);
728
206
        const auto& len_data = len_col.get_data();
729
730
206
        if constexpr (str_const) {
731
44
            res_chars.reserve(size * (str_col.get_chars().size()));
732
162
        } else {
733
162
            res_chars.reserve(str_col.get_chars().size());
734
162
        }
735
736
574
        for (int i = 0; i < size; ++i) {
737
368
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
368
            int len = len_data[index_check_const<len_const>(i)];
739
368
            if (len <= 0 || str.empty()) {
740
59
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
59
                continue;
742
59
            }
743
744
309
            const char* begin = str.begin();
745
309
            const char* p = begin;
746
747
309
            if constexpr (is_ascii) {
748
254
                p = begin + std::min(len, static_cast<int>(str.size));
749
254
            } else {
750
55
                const char* end = str.end();
751
402
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
347
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
347
                }
754
55
            }
755
756
309
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
309
                                                                    res_offsets);
758
309
        }
759
206
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
10
                         size_t size) {
725
10
        auto& res_chars = res.get_chars();
726
10
        auto& res_offsets = res.get_offsets();
727
10
        res_offsets.resize(size);
728
10
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
10
        } else {
733
10
            res_chars.reserve(str_col.get_chars().size());
734
10
        }
735
736
74
        for (int i = 0; i < size; ++i) {
737
64
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
64
            int len = len_data[index_check_const<len_const>(i)];
739
64
            if (len <= 0 || str.empty()) {
740
23
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
23
                continue;
742
23
            }
743
744
41
            const char* begin = str.begin();
745
41
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
41
            } else {
750
41
                const char* end = str.end();
751
320
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
279
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
279
                }
754
41
            }
755
756
41
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
41
                                                                    res_offsets);
758
41
        }
759
10
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
8
                         size_t size) {
725
8
        auto& res_chars = res.get_chars();
726
8
        auto& res_offsets = res.get_offsets();
727
8
        res_offsets.resize(size);
728
8
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
8
        } else {
733
8
            res_chars.reserve(str_col.get_chars().size());
734
8
        }
735
736
16
        for (int i = 0; i < size; ++i) {
737
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
8
            int len = len_data[index_check_const<len_const>(i)];
739
8
            if (len <= 0 || str.empty()) {
740
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
1
                continue;
742
1
            }
743
744
7
            const char* begin = str.begin();
745
7
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
7
            } else {
750
7
                const char* end = str.end();
751
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
34
                }
754
7
            }
755
756
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
7
                                                                    res_offsets);
758
7
        }
759
8
    }
_ZN5doris12FunctionLeft8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
8
                         size_t size) {
725
8
        auto& res_chars = res.get_chars();
726
8
        auto& res_offsets = res.get_offsets();
727
8
        res_offsets.resize(size);
728
8
        const auto& len_data = len_col.get_data();
729
730
8
        if constexpr (str_const) {
731
8
            res_chars.reserve(size * (str_col.get_chars().size()));
732
        } else {
733
            res_chars.reserve(str_col.get_chars().size());
734
        }
735
736
16
        for (int i = 0; i < size; ++i) {
737
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
8
            int len = len_data[index_check_const<len_const>(i)];
739
8
            if (len <= 0 || str.empty()) {
740
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
1
                continue;
742
1
            }
743
744
7
            const char* begin = str.begin();
745
7
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
7
            } else {
750
7
                const char* end = str.end();
751
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
34
                }
754
7
            }
755
756
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
7
                                                                    res_offsets);
758
7
        }
759
8
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
68
                         size_t size) {
725
68
        auto& res_chars = res.get_chars();
726
68
        auto& res_offsets = res.get_offsets();
727
68
        res_offsets.resize(size);
728
68
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
68
        } else {
733
68
            res_chars.reserve(str_col.get_chars().size());
734
68
        }
735
736
204
        for (int i = 0; i < size; ++i) {
737
136
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
136
            int len = len_data[index_check_const<len_const>(i)];
739
136
            if (len <= 0 || str.empty()) {
740
14
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
14
                continue;
742
14
            }
743
744
122
            const char* begin = str.begin();
745
122
            const char* p = begin;
746
747
122
            if constexpr (is_ascii) {
748
122
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
122
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
122
                                                                    res_offsets);
758
122
        }
759
68
    }
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
76
                         size_t size) {
725
76
        auto& res_chars = res.get_chars();
726
76
        auto& res_offsets = res.get_offsets();
727
76
        res_offsets.resize(size);
728
76
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
76
        } else {
733
76
            res_chars.reserve(str_col.get_chars().size());
734
76
        }
735
736
192
        for (int i = 0; i < size; ++i) {
737
116
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
116
            int len = len_data[index_check_const<len_const>(i)];
739
116
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
106
            const char* begin = str.begin();
745
106
            const char* p = begin;
746
747
106
            if constexpr (is_ascii) {
748
106
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
106
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
106
                                                                    res_offsets);
758
106
        }
759
76
    }
_ZN5doris12FunctionLeft8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
36
        if constexpr (str_const) {
731
36
            res_chars.reserve(size * (str_col.get_chars().size()));
732
        } else {
733
            res_chars.reserve(str_col.get_chars().size());
734
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
760
};
761
762
class FunctionRight : public IFunction {
763
public:
764
    static constexpr auto name = "right";
765
116
    static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
766
1
    String get_name() const override { return name; }
767
107
    size_t get_number_of_arguments() const override { return 2; }
768
107
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
769
107
        return std::make_shared<DataTypeString>();
770
107
    }
771
772
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
773
137
                        uint32_t result, size_t input_rows_count) const override {
774
137
        auto int_type = std::make_shared<DataTypeInt32>();
775
137
        auto params1 = ColumnInt32::create(input_rows_count);
776
137
        auto params2 = ColumnInt32::create(input_rows_count);
777
137
        size_t num_columns_without_result = block.columns();
778
779
        // params1 = max(arg[1], -len(arg))
780
137
        auto& index_data = params1->get_data();
781
137
        auto& strlen_data = params2->get_data();
782
783
137
        auto str_col =
784
137
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
785
137
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
786
137
        auto pos_col =
787
137
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
788
137
        const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
789
790
400
        for (int i = 0; i < input_rows_count; ++i) {
791
263
            auto str = str_column->get_data_at(i);
792
263
            strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size);
793
263
        }
794
795
400
        for (int i = 0; i < input_rows_count; ++i) {
796
263
            index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
797
263
        }
798
799
137
        block.insert({std::move(params1), int_type, "index"});
800
137
        block.insert({std::move(params2), int_type, "strlen"});
801
802
137
        ColumnNumbers temp_arguments(3);
803
137
        temp_arguments[0] = arguments[0];
804
137
        temp_arguments[1] = num_columns_without_result;
805
137
        temp_arguments[2] = num_columns_without_result + 1;
806
137
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
807
137
        return Status::OK();
808
137
    }
809
};
810
811
struct NullOrEmptyImpl {
812
0
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; }
813
814
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
815
85
                          uint32_t result, size_t input_rows_count, bool reverse) {
816
85
        auto res_map = ColumnUInt8::create(input_rows_count, 0);
817
818
85
        auto column = block.get_by_position(arguments[0]).column;
819
85
        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
820
35
            column = nullable->get_nested_column_ptr();
821
35
            VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data());
822
35
        }
823
85
        auto str_col = assert_cast<const ColumnString*>(column.get());
824
85
        const auto& offsets = str_col->get_offsets();
825
826
85
        auto& res_map_data = res_map->get_data();
827
248
        for (int i = 0; i < input_rows_count; ++i) {
828
163
            int size = offsets[i] - offsets[i - 1];
829
163
            res_map_data[i] |= (size == 0);
830
163
        }
831
85
        if (reverse) {
832
114
            for (int i = 0; i < input_rows_count; ++i) {
833
74
                res_map_data[i] = !res_map_data[i];
834
74
            }
835
40
        }
836
837
85
        block.replace_by_position(result, std::move(res_map));
838
85
        return Status::OK();
839
85
    }
840
};
841
842
class FunctionNullOrEmpty : public IFunction {
843
public:
844
    static constexpr auto name = "null_or_empty";
845
41
    static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); }
846
1
    String get_name() const override { return name; }
847
32
    size_t get_number_of_arguments() const override { return 1; }
848
849
32
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
850
32
        return std::make_shared<DataTypeUInt8>();
851
32
    }
852
853
77
    bool use_default_implementation_for_nulls() const override { return false; }
854
855
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
856
45
                        uint32_t result, size_t input_rows_count) const override {
857
45
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
858
45
                                                 input_rows_count, false));
859
45
        return Status::OK();
860
45
    }
861
};
862
863
class FunctionNotNullOrEmpty : public IFunction {
864
public:
865
    static constexpr auto name = "not_null_or_empty";
866
37
    static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); }
867
1
    String get_name() const override { return name; }
868
28
    size_t get_number_of_arguments() const override { return 1; }
869
870
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
871
28
        return std::make_shared<DataTypeUInt8>();
872
28
    }
873
874
68
    bool use_default_implementation_for_nulls() const override { return false; }
875
876
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
877
40
                        uint32_t result, size_t input_rows_count) const override {
878
40
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
879
40
                                                 input_rows_count, true));
880
40
        return Status::OK();
881
40
    }
882
};
883
884
class FunctionStringConcat : public IFunction {
885
public:
886
    struct ConcatState {
887
        bool use_state = false;
888
        std::string tail;
889
    };
890
891
    static constexpr auto name = "concat";
892
1.42k
    static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); }
893
0
    String get_name() const override { return name; }
894
0
    size_t get_number_of_arguments() const override { return 0; }
895
1.41k
    bool is_variadic() const override { return true; }
896
897
1.41k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
898
1.41k
        return std::make_shared<DataTypeString>();
899
1.41k
    }
900
901
4.52k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
902
4.52k
        if (scope == FunctionContext::THREAD_LOCAL) {
903
3.10k
            return Status::OK();
904
3.10k
        }
905
1.41k
        std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>();
906
907
1.41k
        context->set_function_state(scope, state);
908
909
1.41k
        state->use_state = true;
910
911
        // Optimize function calls like this:
912
        // concat(col, "123", "abc", "456") -> tail = "123abc456"
913
2.45k
        for (size_t i = 1; i < context->get_num_args(); i++) {
914
2.19k
            const auto* column_string = context->get_constant_col(i);
915
2.19k
            if (column_string == nullptr) {
916
1.13k
                state->use_state = false;
917
1.13k
                return IFunction::open(context, scope);
918
1.13k
            }
919
1.06k
            auto string_vale = column_string->column_ptr->get_data_at(0);
920
1.06k
            if (string_vale.data == nullptr) {
921
                // For concat(col, null), it is handled by default_implementation_for_nulls
922
28
                state->use_state = false;
923
28
                return IFunction::open(context, scope);
924
28
            }
925
926
1.03k
            state->tail.append(string_vale.begin(), string_vale.size);
927
1.03k
        }
928
929
        // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below.
930
257
        state->tail.reserve(state->tail.size() + 16);
931
932
257
        return IFunction::open(context, scope);
933
1.41k
    }
934
935
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
936
1.52k
                        uint32_t result, size_t input_rows_count) const override {
937
1.52k
        DCHECK_GE(arguments.size(), 1);
938
939
1.52k
        if (arguments.size() == 1) {
940
29
            block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
941
29
            return Status::OK();
942
29
        }
943
1.49k
        auto* concat_state = reinterpret_cast<ConcatState*>(
944
1.49k
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
945
1.49k
        if (!concat_state) {
946
0
            return Status::RuntimeError("funciton context for function '{}' must have ConcatState;",
947
0
                                        get_name());
948
0
        }
949
1.49k
        if (concat_state->use_state) {
950
246
            const auto& [col, is_const] =
951
246
                    unpack_if_const(block.get_by_position(arguments[0]).column);
952
246
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
953
246
            if (is_const) {
954
0
                return execute_const<true>(concat_state, block, col_str, result, input_rows_count);
955
246
            } else {
956
246
                return execute_const<false>(concat_state, block, col_str, result, input_rows_count);
957
246
            }
958
959
1.25k
        } else {
960
1.25k
            return execute_vecotr(block, arguments, result, input_rows_count);
961
1.25k
        }
962
1.49k
    }
963
964
    Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result,
965
1.25k
                          size_t input_rows_count) const {
966
1.25k
        int argument_size = arguments.size();
967
1.25k
        std::vector<ColumnPtr> argument_columns(argument_size);
968
969
1.25k
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
970
1.25k
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
971
1.25k
        std::vector<bool> is_const_args(argument_size);
972
973
4.79k
        for (int i = 0; i < argument_size; ++i) {
974
3.54k
            const auto& [col, is_const] =
975
3.54k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
976
977
3.54k
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
978
3.54k
            offsets_list[i] = &col_str->get_offsets();
979
3.54k
            chars_list[i] = &col_str->get_chars();
980
3.54k
            is_const_args[i] = is_const;
981
3.54k
        }
982
983
1.25k
        auto res = ColumnString::create();
984
1.25k
        auto& res_data = res->get_chars();
985
1.25k
        auto& res_offset = res->get_offsets();
986
987
1.25k
        res_offset.resize(input_rows_count);
988
1.25k
        size_t res_reserve_size = 0;
989
4.79k
        for (size_t i = 0; i < argument_size; ++i) {
990
3.54k
            if (is_const_args[i]) {
991
1.17k
                res_reserve_size += (*offsets_list[i])[0] * input_rows_count;
992
2.37k
            } else {
993
2.37k
                res_reserve_size += (*offsets_list[i])[input_rows_count - 1];
994
2.37k
            }
995
3.54k
        }
996
997
1.25k
        ColumnString::check_chars_length(res_reserve_size, 0);
998
999
1.25k
        res_data.resize(res_reserve_size);
1000
1001
1.25k
        auto* data = res_data.data();
1002
1.25k
        size_t dst_offset = 0;
1003
1004
49.6k
        for (size_t i = 0; i < input_rows_count; ++i) {
1005
148k
            for (size_t j = 0; j < argument_size; ++j) {
1006
100k
                const auto& current_offsets = *offsets_list[j];
1007
100k
                const auto& current_chars = *chars_list[j];
1008
100k
                auto idx = index_check_const(i, is_const_args[j]);
1009
100k
                const auto size = current_offsets[idx] - current_offsets[idx - 1];
1010
100k
                if (size > 0) {
1011
97.9k
                    memcpy_small_allow_read_write_overflow15(
1012
97.9k
                            data + dst_offset, current_chars.data() + current_offsets[idx - 1],
1013
97.9k
                            size);
1014
97.9k
                    dst_offset += size;
1015
97.9k
                }
1016
100k
            }
1017
48.3k
            res_offset[i] = dst_offset;
1018
48.3k
        }
1019
1020
1.25k
        block.get_by_position(result).column = std::move(res);
1021
1.25k
        return Status::OK();
1022
1.25k
    }
1023
1024
    template <bool is_const>
1025
    Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str,
1026
246
                         uint32_t result, size_t input_rows_count) const {
1027
        // using tail optimize
1028
1029
246
        auto res = ColumnString::create();
1030
246
        auto& res_data = res->get_chars();
1031
246
        auto& res_offset = res->get_offsets();
1032
246
        res_offset.resize(input_rows_count);
1033
1034
246
        size_t res_reserve_size = 0;
1035
246
        if constexpr (is_const) {
1036
0
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
1037
246
        } else {
1038
246
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
1039
246
        }
1040
246
        res_reserve_size += concat_state->tail.size() * input_rows_count;
1041
1042
246
        ColumnString::check_chars_length(res_reserve_size, 0);
1043
246
        res_data.resize(res_reserve_size);
1044
1045
246
        const auto& tail = concat_state->tail;
1046
246
        auto* data = res_data.data();
1047
246
        size_t dst_offset = 0;
1048
1049
827
        for (size_t i = 0; i < input_rows_count; ++i) {
1050
581
            const auto idx = index_check_const<is_const>(i);
1051
581
            StringRef str_val = col_str->get_data_at(idx);
1052
            // copy column
1053
581
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
1054
581
            dst_offset += str_val.size;
1055
            // copy tail
1056
581
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
1057
581
            dst_offset += tail.size();
1058
581
            res_offset[i] = dst_offset;
1059
581
        }
1060
246
        block.get_by_position(result).column = std::move(res);
1061
246
        return Status::OK();
1062
246
    }
Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
_ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
Line
Count
Source
1026
246
                         uint32_t result, size_t input_rows_count) const {
1027
        // using tail optimize
1028
1029
246
        auto res = ColumnString::create();
1030
246
        auto& res_data = res->get_chars();
1031
246
        auto& res_offset = res->get_offsets();
1032
246
        res_offset.resize(input_rows_count);
1033
1034
246
        size_t res_reserve_size = 0;
1035
        if constexpr (is_const) {
1036
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
1037
246
        } else {
1038
246
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
1039
246
        }
1040
246
        res_reserve_size += concat_state->tail.size() * input_rows_count;
1041
1042
246
        ColumnString::check_chars_length(res_reserve_size, 0);
1043
246
        res_data.resize(res_reserve_size);
1044
1045
246
        const auto& tail = concat_state->tail;
1046
246
        auto* data = res_data.data();
1047
246
        size_t dst_offset = 0;
1048
1049
827
        for (size_t i = 0; i < input_rows_count; ++i) {
1050
581
            const auto idx = index_check_const<is_const>(i);
1051
581
            StringRef str_val = col_str->get_data_at(idx);
1052
            // copy column
1053
581
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
1054
581
            dst_offset += str_val.size;
1055
            // copy tail
1056
581
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
1057
581
            dst_offset += tail.size();
1058
581
            res_offset[i] = dst_offset;
1059
581
        }
1060
246
        block.get_by_position(result).column = std::move(res);
1061
246
        return Status::OK();
1062
246
    }
1063
};
1064
1065
class FunctionStringElt : public IFunction {
1066
public:
1067
    static constexpr auto name = "elt";
1068
431
    static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); }
1069
0
    String get_name() const override { return name; }
1070
0
    size_t get_number_of_arguments() const override { return 0; }
1071
423
    bool is_variadic() const override { return true; }
1072
1073
422
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1074
422
        return make_nullable(std::make_shared<DataTypeString>());
1075
422
    }
1076
856
    bool use_default_implementation_for_nulls() const override { return false; }
1077
1078
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1079
434
                        uint32_t result, size_t input_rows_count) const override {
1080
434
        int arguent_size = arguments.size();
1081
434
        int num_children = arguent_size - 1;
1082
434
        auto res = ColumnString::create();
1083
1084
434
        if (auto const_column = check_and_get_column<ColumnConst>(
1085
434
                    *block.get_by_position(arguments[0]).column)) {
1086
154
            auto data = const_column->get_data_at(0);
1087
            // return NULL, pos is null or pos < 0 or pos > num_children
1088
154
            auto is_null = data.data == nullptr;
1089
154
            auto pos = is_null ? 0 : *(Int32*)data.data;
1090
154
            is_null = pos <= 0 || pos > num_children;
1091
1092
154
            auto null_map = ColumnUInt8::create(input_rows_count, is_null);
1093
154
            if (is_null) {
1094
135
                res->insert_many_defaults(input_rows_count);
1095
135
            } else {
1096
19
                auto& target_column = block.get_by_position(arguments[pos]).column;
1097
19
                if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) {
1098
7
                    auto target_data = target_const_column->get_data_at(0);
1099
                    // return NULL, no target data
1100
7
                    if (target_data.data == nullptr) {
1101
1
                        null_map = ColumnUInt8::create(input_rows_count, true);
1102
1
                        res->insert_many_defaults(input_rows_count);
1103
6
                    } else {
1104
6
                        res->insert_data_repeatedly(target_data.data, target_data.size,
1105
6
                                                    input_rows_count);
1106
6
                    }
1107
12
                } else if (auto target_nullable_column =
1108
12
                                   check_and_get_column<ColumnNullable>(*target_column)) {
1109
12
                    auto& target_null_map = target_nullable_column->get_null_map_data();
1110
12
                    VectorizedUtils::update_null_map(
1111
12
                            assert_cast<ColumnUInt8&>(*null_map).get_data(), target_null_map);
1112
1113
12
                    auto& target_str_column = assert_cast<const ColumnString&>(
1114
12
                            target_nullable_column->get_nested_column());
1115
12
                    res->get_chars().assign(target_str_column.get_chars().begin(),
1116
12
                                            target_str_column.get_chars().end());
1117
12
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
1118
12
                                              target_str_column.get_offsets().end());
1119
12
                } else {
1120
0
                    auto& target_str_column = assert_cast<const ColumnString&>(*target_column);
1121
0
                    res->get_chars().assign(target_str_column.get_chars().begin(),
1122
0
                                            target_str_column.get_chars().end());
1123
0
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
1124
0
                                              target_str_column.get_offsets().end());
1125
0
                }
1126
19
            }
1127
154
            block.get_by_position(result).column =
1128
154
                    ColumnNullable::create(std::move(res), std::move(null_map));
1129
280
        } else if (auto pos_null_column = check_and_get_column<ColumnNullable>(
1130
280
                           *block.get_by_position(arguments[0]).column)) {
1131
219
            auto& pos_column =
1132
219
                    assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column());
1133
219
            auto& pos_null_map = pos_null_column->get_null_map_data();
1134
219
            auto null_map = ColumnUInt8::create(input_rows_count, false);
1135
219
            auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data();
1136
1137
515
            for (size_t i = 0; i < input_rows_count; ++i) {
1138
296
                auto pos = pos_column.get_element(i);
1139
296
                res_null_map[i] =
1140
296
                        pos_null_map[i] || pos <= 0 || pos > num_children ||
1141
296
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
1142
35
                                nullptr;
1143
296
                if (res_null_map[i]) {
1144
261
                    res->insert_default();
1145
261
                } else {
1146
35
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
1147
35
                    res->insert_data(insert_data.data, insert_data.size);
1148
35
                }
1149
296
            }
1150
219
            block.get_by_position(result).column =
1151
219
                    ColumnNullable::create(std::move(res), std::move(null_map));
1152
219
        } else {
1153
61
            auto& pos_column =
1154
61
                    assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column);
1155
61
            auto null_map = ColumnUInt8::create(input_rows_count, false);
1156
61
            auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data();
1157
1158
138
            for (size_t i = 0; i < input_rows_count; ++i) {
1159
77
                auto pos = pos_column.get_element(i);
1160
77
                res_null_map[i] =
1161
77
                        pos <= 0 || pos > num_children ||
1162
77
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
1163
27
                                nullptr;
1164
77
                if (res_null_map[i]) {
1165
50
                    res->insert_default();
1166
50
                } else {
1167
27
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
1168
27
                    res->insert_data(insert_data.data, insert_data.size);
1169
27
                }
1170
77
            }
1171
61
            block.get_by_position(result).column =
1172
61
                    ColumnNullable::create(std::move(res), std::move(null_map));
1173
61
        }
1174
434
        return Status::OK();
1175
434
    }
1176
};
1177
1178
// concat_ws (string,string....) or (string, Array)
1179
// TODO: avoid use fmtlib
1180
class FunctionStringConcatWs : public IFunction {
1181
public:
1182
    using Chars = ColumnString::Chars;
1183
    using Offsets = ColumnString::Offsets;
1184
1185
    static constexpr auto name = "concat_ws";
1186
535
    static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); }
1187
0
    String get_name() const override { return name; }
1188
0
    size_t get_number_of_arguments() const override { return 0; }
1189
527
    bool is_variadic() const override { return true; }
1190
1191
526
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1192
526
        const IDataType* first_type = arguments[0].get();
1193
526
        if (first_type->is_nullable()) {
1194
459
            return make_nullable(std::make_shared<DataTypeString>());
1195
459
        } else {
1196
67
            return std::make_shared<DataTypeString>();
1197
67
        }
1198
526
    }
1199
1.10k
    bool use_default_implementation_for_nulls() const override { return false; }
1200
1201
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1202
576
                        uint32_t result, size_t input_rows_count) const override {
1203
576
        DCHECK_GE(arguments.size(), 2);
1204
576
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1205
        // we create a zero column to simply implement
1206
576
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1207
576
        auto res = ColumnString::create();
1208
576
        bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable();
1209
576
        size_t argument_size = arguments.size();
1210
576
        std::vector<const Offsets*> offsets_list(argument_size);
1211
576
        std::vector<const Chars*> chars_list(argument_size);
1212
576
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
1213
1214
576
        std::vector<ColumnPtr> argument_columns(argument_size);
1215
576
        std::vector<ColumnPtr> argument_null_columns(argument_size);
1216
1217
1.96k
        for (size_t i = 0; i < argument_size; ++i) {
1218
1.38k
            argument_columns[i] =
1219
1.38k
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
1220
1.38k
            if (const auto* nullable =
1221
1.38k
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
1222
                // Danger: Here must dispose the null map data first! Because
1223
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
1224
                // of column nullable mem of null map
1225
1.14k
                null_list[i] = &nullable->get_null_map_data();
1226
1.14k
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
1227
1.14k
                argument_columns[i] = nullable->get_nested_column_ptr();
1228
1.14k
            } else {
1229
245
                null_list[i] = &const_null_map->get_data();
1230
245
            }
1231
1232
1.38k
            if (is_column<ColumnArray>(argument_columns[i].get())) {
1233
121
                continue;
1234
121
            }
1235
1236
1.26k
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
1237
1.26k
            offsets_list[i] = &col_str->get_offsets();
1238
1.26k
            chars_list[i] = &col_str->get_chars();
1239
1.26k
        }
1240
1241
576
        auto& res_data = res->get_chars();
1242
576
        auto& res_offset = res->get_offsets();
1243
576
        res_offset.resize(input_rows_count);
1244
1245
576
        VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]);
1246
576
        fmt::memory_buffer buffer;
1247
576
        std::vector<std::string_view> views;
1248
1249
576
        if (is_column<ColumnArray>(argument_columns[1].get())) {
1250
            // Determine if the nested type of the array is String
1251
121
            const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
1252
121
            if (!array_column.get_data().is_column_string()) {
1253
0
                return Status::NotSupported(
1254
0
                        fmt::format("unsupported nested array of type {} for function {}",
1255
0
                                    is_column_nullable(array_column.get_data())
1256
0
                                            ? array_column.get_data().get_name()
1257
0
                                            : array_column.get_data().get_name(),
1258
0
                                    get_name()));
1259
0
            }
1260
            // Concat string in array
1261
121
            _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list,
1262
121
                           null_list, res_data, res_offset);
1263
1264
455
        } else {
1265
            // Concat string
1266
455
            _execute_string(input_rows_count, argument_size, buffer, views, offsets_list,
1267
455
                            chars_list, null_list, res_data, res_offset);
1268
455
        }
1269
576
        if (is_null_type) {
1270
459
            block.get_by_position(result).column =
1271
459
                    ColumnNullable::create(std::move(res), std::move(null_map));
1272
459
        } else {
1273
117
            block.get_by_position(result).column = std::move(res);
1274
117
        }
1275
576
        return Status::OK();
1276
576
    }
1277
1278
private:
1279
    void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column,
1280
                        fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
1281
                        const std::vector<const Offsets*>& offsets_list,
1282
                        const std::vector<const Chars*>& chars_list,
1283
                        const std::vector<const ColumnUInt8::Container*>& null_list,
1284
121
                        Chars& res_data, Offsets& res_offset) const {
1285
        // Get array nested column
1286
121
        const UInt8* array_nested_null_map = nullptr;
1287
121
        ColumnPtr array_nested_column = nullptr;
1288
1289
121
        if (is_column_nullable(array_column.get_data())) {
1290
121
            const auto& array_nested_null_column =
1291
121
                    reinterpret_cast<const ColumnNullable&>(array_column.get_data());
1292
            // String's null map in array
1293
121
            array_nested_null_map =
1294
121
                    array_nested_null_column.get_null_map_column().get_data().data();
1295
121
            array_nested_column = array_nested_null_column.get_nested_column_ptr();
1296
121
        } else {
1297
0
            array_nested_column = array_column.get_data_ptr();
1298
0
        }
1299
1300
121
        const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column);
1301
121
        const Chars& string_src_chars = string_column.get_chars();
1302
121
        const auto& src_string_offsets = string_column.get_offsets();
1303
121
        const auto& src_array_offsets = array_column.get_offsets();
1304
121
        size_t current_src_array_offset = 0;
1305
1306
        // Concat string in array
1307
613
        for (size_t i = 0; i < input_rows_count; ++i) {
1308
492
            auto& sep_offsets = *offsets_list[0];
1309
492
            auto& sep_chars = *chars_list[0];
1310
492
            auto& sep_nullmap = *null_list[0];
1311
1312
492
            if (sep_nullmap[i]) {
1313
11
                res_offset[i] = res_data.size();
1314
11
                current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1];
1315
11
                continue;
1316
11
            }
1317
1318
481
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
1319
481
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
1320
1321
481
            std::string_view sep(sep_data, sep_size);
1322
481
            buffer.clear();
1323
481
            views.clear();
1324
1325
481
            for (auto next_src_array_offset = src_array_offsets[i];
1326
1.25k
                 current_src_array_offset < next_src_array_offset; ++current_src_array_offset) {
1327
776
                const auto current_src_string_offset =
1328
776
                        current_src_array_offset ? src_string_offsets[current_src_array_offset - 1]
1329
776
                                                 : 0;
1330
776
                size_t bytes_to_copy =
1331
776
                        src_string_offsets[current_src_array_offset] - current_src_string_offset;
1332
776
                const char* ptr =
1333
776
                        reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]);
1334
1335
776
                if (array_nested_null_map == nullptr ||
1336
776
                    !array_nested_null_map[current_src_array_offset]) {
1337
751
                    views.emplace_back(ptr, bytes_to_copy);
1338
751
                }
1339
776
            }
1340
1341
481
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
1342
1343
481
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1344
481
                                        res_offset);
1345
481
        }
1346
121
    }
1347
1348
    void _execute_string(const size_t& input_rows_count, const size_t& argument_size,
1349
                         fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
1350
                         const std::vector<const Offsets*>& offsets_list,
1351
                         const std::vector<const Chars*>& chars_list,
1352
                         const std::vector<const ColumnUInt8::Container*>& null_list,
1353
455
                         Chars& res_data, Offsets& res_offset) const {
1354
        // Concat string
1355
1.05k
        for (size_t i = 0; i < input_rows_count; ++i) {
1356
596
            auto& sep_offsets = *offsets_list[0];
1357
596
            auto& sep_chars = *chars_list[0];
1358
596
            auto& sep_nullmap = *null_list[0];
1359
596
            if (sep_nullmap[i]) {
1360
79
                res_offset[i] = res_data.size();
1361
79
                continue;
1362
79
            }
1363
1364
517
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
1365
517
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
1366
1367
517
            std::string_view sep(sep_data, sep_size);
1368
517
            buffer.clear();
1369
517
            views.clear();
1370
1.22k
            for (size_t j = 1; j < argument_size; ++j) {
1371
707
                auto& current_offsets = *offsets_list[j];
1372
707
                auto& current_chars = *chars_list[j];
1373
707
                auto& current_nullmap = *null_list[j];
1374
707
                int size = current_offsets[i] - current_offsets[i - 1];
1375
707
                const char* ptr =
1376
707
                        reinterpret_cast<const char*>(&current_chars[current_offsets[i - 1]]);
1377
707
                if (!current_nullmap[i]) {
1378
630
                    views.emplace_back(ptr, size);
1379
630
                }
1380
707
            }
1381
517
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
1382
517
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1383
517
                                        res_offset);
1384
517
        }
1385
455
    }
1386
};
1387
1388
class FunctionStringRepeat : public IFunction {
1389
public:
1390
    static constexpr auto name = "repeat";
1391
209
    static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
1392
1
    String get_name() const override { return name; }
1393
200
    size_t get_number_of_arguments() const override { return 2; }
1394
    // should set NULL value of nested data to default,
1395
    // as iff it's not inited and invalid, the repeat result of length is so large cause overflow
1396
235
    bool need_replace_null_data_to_default() const override { return true; }
1397
1398
200
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1399
200
        return make_nullable(std::make_shared<DataTypeString>());
1400
200
    }
1401
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1402
248
                        uint32_t result, size_t input_rows_count) const override {
1403
248
        DCHECK_EQ(arguments.size(), 2);
1404
248
        auto res = ColumnString::create();
1405
248
        auto null_map = ColumnUInt8::create();
1406
1407
248
        ColumnPtr argument_ptr[2];
1408
248
        argument_ptr[0] =
1409
248
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1410
248
        argument_ptr[1] = block.get_by_position(arguments[1]).column;
1411
1412
248
        if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
1413
248
            if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
1414
130
                RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(),
1415
130
                                              col2->get_data(), res->get_chars(),
1416
130
                                              res->get_offsets(), null_map->get_data()));
1417
130
                block.replace_by_position(
1418
130
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1419
130
                return Status::OK();
1420
130
            } else if (const auto* col2_const =
1421
118
                               check_and_get_column<ColumnConst>(*argument_ptr[1])) {
1422
118
                DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
1423
118
                int repeat = col2_const->get_int(0);
1424
118
                if (repeat <= 0) {
1425
18
                    null_map->get_data().resize_fill(input_rows_count, 0);
1426
18
                    res->insert_many_defaults(input_rows_count);
1427
100
                } else {
1428
100
                    vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
1429
100
                                 res->get_offsets(), null_map->get_data());
1430
100
                }
1431
118
                block.replace_by_position(
1432
118
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1433
118
                return Status::OK();
1434
118
            }
1435
248
        }
1436
1437
0
        return Status::RuntimeError("repeat function get error param: {}, {}",
1438
0
                                    argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
1439
248
    }
1440
1441
    Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1442
                         const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
1443
                         ColumnString::Offsets& res_offsets,
1444
130
                         ColumnUInt8::Container& null_map) const {
1445
130
        size_t input_row_size = offsets.size();
1446
1447
130
        fmt::memory_buffer buffer;
1448
130
        res_offsets.resize(input_row_size);
1449
130
        null_map.resize_fill(input_row_size, 0);
1450
353
        for (ssize_t i = 0; i < input_row_size; ++i) {
1451
223
            buffer.clear();
1452
223
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1453
223
            size_t size = offsets[i] - offsets[i - 1];
1454
223
            int repeat = repeats[i];
1455
223
            if (repeat <= 0) {
1456
58
                StringOP::push_empty_string(i, res_data, res_offsets);
1457
165
            } else {
1458
165
                ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
1459
132k
                for (int j = 0; j < repeat; ++j) {
1460
131k
                    buffer.append(raw_str, raw_str + size);
1461
131k
                }
1462
165
                StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
1463
165
                                            res_data, res_offsets);
1464
165
            }
1465
223
        }
1466
130
        return Status::OK();
1467
130
    }
1468
1469
    // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
1470
    //       2. abstract the `vector_vector` and `vector_const`
1471
    //       3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
1472
    void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1473
                      int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1474
100
                      ColumnUInt8::Container& null_map) const {
1475
100
        size_t input_row_size = offsets.size();
1476
1477
100
        fmt::memory_buffer buffer;
1478
100
        res_offsets.resize(input_row_size);
1479
100
        null_map.resize_fill(input_row_size, 0);
1480
4.28k
        for (ssize_t i = 0; i < input_row_size; ++i) {
1481
4.18k
            buffer.clear();
1482
4.18k
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1483
4.18k
            size_t size = offsets[i] - offsets[i - 1];
1484
4.18k
            ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
1485
1486
44.6k
            for (int j = 0; j < repeat; ++j) {
1487
40.4k
                buffer.append(raw_str, raw_str + size);
1488
40.4k
            }
1489
4.18k
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1490
4.18k
                                        res_offsets);
1491
4.18k
        }
1492
100
    }
1493
};
1494
1495
template <typename Impl>
1496
class FunctionStringPad : public IFunction {
1497
public:
1498
    static constexpr auto name = Impl::name;
1499
1.56k
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv
Line
Count
Source
1499
793
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv
Line
Count
Source
1499
769
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
1500
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev
Line
Count
Source
1500
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev
Line
Count
Source
1500
1
    String get_name() const override { return name; }
1501
1.54k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv
Line
Count
Source
1501
784
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv
Line
Count
Source
1501
760
    size_t get_number_of_arguments() const override { return 3; }
1502
1503
1.54k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
1.54k
        return make_nullable(std::make_shared<DataTypeString>());
1505
1.54k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1503
784
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
784
        return make_nullable(std::make_shared<DataTypeString>());
1505
784
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1503
760
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
760
        return make_nullable(std::make_shared<DataTypeString>());
1505
760
    }
1506
1507
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1508
1.07k
                        uint32_t result, size_t input_rows_count) const override {
1509
1.07k
        DCHECK_GE(arguments.size(), 3);
1510
1.07k
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
1.07k
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
1.07k
        auto res = ColumnString::create();
1514
1515
1.07k
        ColumnPtr col[3];
1516
1.07k
        bool col_const[3];
1517
4.30k
        for (size_t i = 0; i < 3; ++i) {
1518
3.22k
            std::tie(col[i], col_const[i]) =
1519
3.22k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
3.22k
        }
1521
1.07k
        auto& null_map_data = null_map->get_data();
1522
1.07k
        auto& res_offsets = res->get_offsets();
1523
1.07k
        auto& res_chars = res->get_chars();
1524
1.07k
        res_offsets.resize(input_rows_count);
1525
1526
1.07k
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
1.07k
        const auto& strcol_offsets = strcol->get_offsets();
1528
1.07k
        const auto& strcol_chars = strcol->get_chars();
1529
1530
1.07k
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
1.07k
        const auto& col_len_data = col_len->get_data();
1532
1533
1.07k
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
1.07k
        const auto& padcol_offsets = padcol->get_offsets();
1535
1.07k
        const auto& padcol_chars = padcol->get_chars();
1536
1.07k
        std::visit(
1537
1.07k
                [&](auto str_const, auto len_const, auto pad_const) {
1538
1.07k
                    execute_utf8<str_const, len_const, pad_const>(
1539
1.07k
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
1.07k
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
1.07k
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
1537
167
                [&](auto str_const, auto len_const, auto pad_const) {
1538
167
                    execute_utf8<str_const, len_const, pad_const>(
1539
167
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
167
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
167
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
71
                [&](auto str_const, auto len_const, auto pad_const) {
1538
71
                    execute_utf8<str_const, len_const, pad_const>(
1539
71
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
71
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
71
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
1537
153
                [&](auto str_const, auto len_const, auto pad_const) {
1538
153
                    execute_utf8<str_const, len_const, pad_const>(
1539
153
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
153
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
153
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
65
                [&](auto str_const, auto len_const, auto pad_const) {
1538
65
                    execute_utf8<str_const, len_const, pad_const>(
1539
65
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
65
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
65
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
1542
1.07k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
1.07k
                make_bool_variant(col_const[2]));
1544
1545
1.07k
        block.get_by_position(result).column =
1546
1.07k
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
1.07k
        return Status::OK();
1548
1.07k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1508
548
                        uint32_t result, size_t input_rows_count) const override {
1509
548
        DCHECK_GE(arguments.size(), 3);
1510
548
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
548
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
548
        auto res = ColumnString::create();
1514
1515
548
        ColumnPtr col[3];
1516
548
        bool col_const[3];
1517
2.19k
        for (size_t i = 0; i < 3; ++i) {
1518
1.64k
            std::tie(col[i], col_const[i]) =
1519
1.64k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
1.64k
        }
1521
548
        auto& null_map_data = null_map->get_data();
1522
548
        auto& res_offsets = res->get_offsets();
1523
548
        auto& res_chars = res->get_chars();
1524
548
        res_offsets.resize(input_rows_count);
1525
1526
548
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
548
        const auto& strcol_offsets = strcol->get_offsets();
1528
548
        const auto& strcol_chars = strcol->get_chars();
1529
1530
548
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
548
        const auto& col_len_data = col_len->get_data();
1532
1533
548
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
548
        const auto& padcol_offsets = padcol->get_offsets();
1535
548
        const auto& padcol_chars = padcol->get_chars();
1536
548
        std::visit(
1537
548
                [&](auto str_const, auto len_const, auto pad_const) {
1538
548
                    execute_utf8<str_const, len_const, pad_const>(
1539
548
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
548
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
548
                },
1542
548
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
548
                make_bool_variant(col_const[2]));
1544
1545
548
        block.get_by_position(result).column =
1546
548
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
548
        return Status::OK();
1548
548
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1508
528
                        uint32_t result, size_t input_rows_count) const override {
1509
528
        DCHECK_GE(arguments.size(), 3);
1510
528
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
528
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
528
        auto res = ColumnString::create();
1514
1515
528
        ColumnPtr col[3];
1516
528
        bool col_const[3];
1517
2.11k
        for (size_t i = 0; i < 3; ++i) {
1518
1.58k
            std::tie(col[i], col_const[i]) =
1519
1.58k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
1.58k
        }
1521
528
        auto& null_map_data = null_map->get_data();
1522
528
        auto& res_offsets = res->get_offsets();
1523
528
        auto& res_chars = res->get_chars();
1524
528
        res_offsets.resize(input_rows_count);
1525
1526
528
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
528
        const auto& strcol_offsets = strcol->get_offsets();
1528
528
        const auto& strcol_chars = strcol->get_chars();
1529
1530
528
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
528
        const auto& col_len_data = col_len->get_data();
1532
1533
528
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
528
        const auto& padcol_offsets = padcol->get_offsets();
1535
528
        const auto& padcol_chars = padcol->get_chars();
1536
528
        std::visit(
1537
528
                [&](auto str_const, auto len_const, auto pad_const) {
1538
528
                    execute_utf8<str_const, len_const, pad_const>(
1539
528
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
528
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
528
                },
1542
528
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
528
                make_bool_variant(col_const[2]));
1544
1545
528
        block.get_by_position(result).column =
1546
528
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
528
        return Status::OK();
1548
528
    }
1549
1550
    template <bool str_const, bool len_const, bool pad_const>
1551
    void execute_utf8(const ColumnString::Offsets& strcol_offsets,
1552
                      const ColumnString::Chars& strcol_chars,
1553
                      const ColumnInt32::Container& col_len_data,
1554
                      const ColumnString::Offsets& padcol_offsets,
1555
                      const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets,
1556
                      ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data,
1557
1.07k
                      size_t input_rows_count) const {
1558
1.07k
        std::vector<size_t> pad_index;
1559
1.07k
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
1.07k
        if constexpr (pad_const) {
1563
384
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
384
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
384
        }
1566
1567
1.07k
        fmt::memory_buffer buffer;
1568
1.07k
        buffer.resize(strcol_chars.size());
1569
1.07k
        size_t buffer_len = 0;
1570
1571
4.42k
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
3.35k
            if constexpr (!pad_const) {
1573
953
                pad_index.clear();
1574
953
            }
1575
3.35k
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
3.35k
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
562
                null_map_data[i] = true;
1579
562
                res_offsets[i] = buffer_len;
1580
2.78k
            } else {
1581
2.78k
                const auto str_idx = index_check_const<str_const>(i);
1582
2.78k
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
2.78k
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
2.78k
                const auto pad_idx = index_check_const<pad_const>(i);
1585
2.78k
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
2.78k
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
2.78k
                auto [iterate_byte_len, iterate_char_len] =
1589
2.78k
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
2.78k
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
2.78k
                if (iterate_char_len == len) {
1593
599
                    buffer.resize(buffer_len + iterate_byte_len);
1594
599
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
599
                    buffer_len += iterate_byte_len;
1596
599
                    res_offsets[i] = buffer_len;
1597
599
                    continue;
1598
599
                }
1599
2.19k
                size_t pad_char_size;
1600
2.19k
                if constexpr (!pad_const) {
1601
154
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
154
                                                                         pad_len, pad_index);
1603
2.03k
                } else {
1604
2.03k
                    pad_char_size = const_pad_char_size;
1605
2.03k
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2.19k
                if (pad_char_size == 0) {
1609
18
                    res_offsets[i] = buffer_len;
1610
18
                    continue;
1611
18
                }
1612
2.17k
                const size_t str_char_size = iterate_char_len;
1613
2.17k
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2.17k
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2.17k
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2.17k
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2.17k
                buffer.resize(buffer_len + new_capacity);
1618
2.17k
                if constexpr (!Impl::is_lpad) {
1619
2.08k
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2.08k
                    buffer_len += str_len;
1621
2.08k
                }
1622
                // Prepend chars of pad.
1623
2.17k
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2.17k
                                      pad_times);
1625
2.17k
                buffer_len += pad_times * pad_len;
1626
1627
2.17k
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2.17k
                buffer_len += pad_remainder_len;
1629
1630
2.17k
                if constexpr (Impl::is_lpad) {
1631
87
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
87
                    buffer_len += str_len;
1633
87
                }
1634
2.17k
                res_offsets[i] = buffer_len;
1635
2.17k
            }
1636
3.35k
        }
1637
1.07k
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
1.07k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
167
                      size_t input_rows_count) const {
1558
167
        std::vector<size_t> pad_index;
1559
167
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
167
        fmt::memory_buffer buffer;
1568
167
        buffer.resize(strcol_chars.size());
1569
167
        size_t buffer_len = 0;
1570
1571
465
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
298
            if constexpr (!pad_const) {
1573
298
                pad_index.clear();
1574
298
            }
1575
298
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
298
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
89
                null_map_data[i] = true;
1579
89
                res_offsets[i] = buffer_len;
1580
209
            } else {
1581
209
                const auto str_idx = index_check_const<str_const>(i);
1582
209
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
209
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
209
                const auto pad_idx = index_check_const<pad_const>(i);
1585
209
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
209
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
209
                auto [iterate_byte_len, iterate_char_len] =
1589
209
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
209
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
209
                if (iterate_char_len == len) {
1593
136
                    buffer.resize(buffer_len + iterate_byte_len);
1594
136
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
136
                    buffer_len += iterate_byte_len;
1596
136
                    res_offsets[i] = buffer_len;
1597
136
                    continue;
1598
136
                }
1599
73
                size_t pad_char_size;
1600
73
                if constexpr (!pad_const) {
1601
73
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
73
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
73
                if (pad_char_size == 0) {
1609
8
                    res_offsets[i] = buffer_len;
1610
8
                    continue;
1611
8
                }
1612
65
                const size_t str_char_size = iterate_char_len;
1613
65
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
65
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
65
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
65
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
65
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
65
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
65
                                      pad_times);
1625
65
                buffer_len += pad_times * pad_len;
1626
1627
65
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
65
                buffer_len += pad_remainder_len;
1629
1630
65
                if constexpr (Impl::is_lpad) {
1631
65
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
65
                    buffer_len += str_len;
1633
65
                }
1634
65
                res_offsets[i] = buffer_len;
1635
65
            }
1636
298
        }
1637
167
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
167
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
71
                      size_t input_rows_count) const {
1558
71
        std::vector<size_t> pad_index;
1559
71
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
71
        if constexpr (pad_const) {
1563
71
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
71
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
71
        }
1566
1567
71
        fmt::memory_buffer buffer;
1568
71
        buffer.resize(strcol_chars.size());
1569
71
        size_t buffer_len = 0;
1570
1571
144
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
73
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
73
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
41
            } else {
1581
41
                const auto str_idx = index_check_const<str_const>(i);
1582
41
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
41
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
41
                const auto pad_idx = index_check_const<pad_const>(i);
1585
41
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
41
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
41
                auto [iterate_byte_len, iterate_char_len] =
1589
41
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
41
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
41
                if (iterate_char_len == len) {
1593
29
                    buffer.resize(buffer_len + iterate_byte_len);
1594
29
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
29
                    buffer_len += iterate_byte_len;
1596
29
                    res_offsets[i] = buffer_len;
1597
29
                    continue;
1598
29
                }
1599
12
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
12
                } else {
1604
12
                    pad_char_size = const_pad_char_size;
1605
12
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
12
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
12
                const size_t str_char_size = iterate_char_len;
1613
12
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
12
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
12
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
12
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
12
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
12
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
12
                                      pad_times);
1625
12
                buffer_len += pad_times * pad_len;
1626
1627
12
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
12
                buffer_len += pad_remainder_len;
1629
1630
12
                if constexpr (Impl::is_lpad) {
1631
12
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
12
                    buffer_len += str_len;
1633
12
                }
1634
12
                res_offsets[i] = buffer_len;
1635
12
            }
1636
73
        }
1637
71
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
71
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
153
                      size_t input_rows_count) const {
1558
153
        std::vector<size_t> pad_index;
1559
153
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
153
        fmt::memory_buffer buffer;
1568
153
        buffer.resize(strcol_chars.size());
1569
153
        size_t buffer_len = 0;
1570
1571
436
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
283
            if constexpr (!pad_const) {
1573
283
                pad_index.clear();
1574
283
            }
1575
283
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
283
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
89
                null_map_data[i] = true;
1579
89
                res_offsets[i] = buffer_len;
1580
194
            } else {
1581
194
                const auto str_idx = index_check_const<str_const>(i);
1582
194
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
194
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
194
                const auto pad_idx = index_check_const<pad_const>(i);
1585
194
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
194
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
194
                auto [iterate_byte_len, iterate_char_len] =
1589
194
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
194
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
194
                if (iterate_char_len == len) {
1593
125
                    buffer.resize(buffer_len + iterate_byte_len);
1594
125
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
125
                    buffer_len += iterate_byte_len;
1596
125
                    res_offsets[i] = buffer_len;
1597
125
                    continue;
1598
125
                }
1599
69
                size_t pad_char_size;
1600
69
                if constexpr (!pad_const) {
1601
69
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
69
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
69
                if (pad_char_size == 0) {
1609
10
                    res_offsets[i] = buffer_len;
1610
10
                    continue;
1611
10
                }
1612
59
                const size_t str_char_size = iterate_char_len;
1613
59
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
59
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
59
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
59
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
59
                buffer.resize(buffer_len + new_capacity);
1618
59
                if constexpr (!Impl::is_lpad) {
1619
59
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
59
                    buffer_len += str_len;
1621
59
                }
1622
                // Prepend chars of pad.
1623
59
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
59
                                      pad_times);
1625
59
                buffer_len += pad_times * pad_len;
1626
1627
59
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
59
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
59
                res_offsets[i] = buffer_len;
1635
59
            }
1636
283
        }
1637
153
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
153
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
65
                      size_t input_rows_count) const {
1558
65
        std::vector<size_t> pad_index;
1559
65
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
65
        if constexpr (pad_const) {
1563
65
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
65
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
65
        }
1566
1567
65
        fmt::memory_buffer buffer;
1568
65
        buffer.resize(strcol_chars.size());
1569
65
        size_t buffer_len = 0;
1570
1571
2.14k
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
2.07k
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
2.07k
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
2.04k
            } else {
1581
2.04k
                const auto str_idx = index_check_const<str_const>(i);
1582
2.04k
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
2.04k
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
2.04k
                const auto pad_idx = index_check_const<pad_const>(i);
1585
2.04k
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
2.04k
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
2.04k
                auto [iterate_byte_len, iterate_char_len] =
1589
2.04k
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
2.04k
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
2.04k
                if (iterate_char_len == len) {
1593
29
                    buffer.resize(buffer_len + iterate_byte_len);
1594
29
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
29
                    buffer_len += iterate_byte_len;
1596
29
                    res_offsets[i] = buffer_len;
1597
29
                    continue;
1598
29
                }
1599
2.01k
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2.01k
                } else {
1604
2.01k
                    pad_char_size = const_pad_char_size;
1605
2.01k
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2.01k
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2.01k
                const size_t str_char_size = iterate_char_len;
1613
2.01k
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2.01k
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2.01k
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2.01k
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2.01k
                buffer.resize(buffer_len + new_capacity);
1618
2.01k
                if constexpr (!Impl::is_lpad) {
1619
2.01k
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2.01k
                    buffer_len += str_len;
1621
2.01k
                }
1622
                // Prepend chars of pad.
1623
2.01k
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2.01k
                                      pad_times);
1625
2.01k
                buffer_len += pad_times * pad_len;
1626
1627
2.01k
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2.01k
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2.01k
                res_offsets[i] = buffer_len;
1635
2.01k
            }
1636
2.07k
        }
1637
65
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
65
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
1639
};
1640
1641
template <typename Impl>
1642
class FunctionStringFormatRound : public IFunction {
1643
public:
1644
    static constexpr auto name = "format_round";
1645
164
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE6createEv
Line
Count
Source
1645
41
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE6createEv
Line
Count
Source
1645
16
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE6createEv
Line
Count
Source
1645
11
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv
Line
Count
Source
1645
10
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv
Line
Count
Source
1645
36
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv
Line
Count
Source
1645
29
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv
Line
Count
Source
1645
12
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv
Line
Count
Source
1645
9
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
1646
8
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
1647
1648
92
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
92
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
92
        return std::make_shared<DataTypeString>();
1654
92
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
32
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
32
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
32
        return std::make_shared<DataTypeString>();
1654
32
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
7
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
7
        return std::make_shared<DataTypeString>();
1654
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
2
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
2
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
2
        return std::make_shared<DataTypeString>();
1654
2
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
27
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
27
        return std::make_shared<DataTypeString>();
1654
27
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
20
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
20
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
20
        return std::make_shared<DataTypeString>();
1654
20
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
3
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
3
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
3
        return std::make_shared<DataTypeString>();
1654
3
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
1655
64
    DataTypes get_variadic_argument_types_impl() const override {
1656
64
        return Impl::get_variadic_argument_types();
1657
64
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
8
    DataTypes get_variadic_argument_types_impl() const override {
1656
8
        return Impl::get_variadic_argument_types();
1657
8
    }
1658
92
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
32
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
7
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
2
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
27
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
20
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
3
    size_t get_number_of_arguments() const override { return 2; }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv
1659
1660
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1661
94
                        uint32_t result, size_t input_rows_count) const override {
1662
94
        auto res_column = ColumnString::create();
1663
94
        ColumnPtr argument_column =
1664
94
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
94
        ColumnPtr argument_column_2;
1666
94
        bool is_const;
1667
94
        std::tie(argument_column_2, is_const) =
1668
94
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
94
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
94
        if (is_const) {
1672
12
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
12
                                                         argument_column_2, input_rows_count));
1674
82
        } else {
1675
82
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
82
                                                          argument_column_2, input_rows_count));
1677
82
        }
1678
1679
92
        block.replace_by_position(result, std::move(res_column));
1680
92
        return Status::OK();
1681
94
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
33
                        uint32_t result, size_t input_rows_count) const override {
1662
33
        auto res_column = ColumnString::create();
1663
33
        ColumnPtr argument_column =
1664
33
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
33
        ColumnPtr argument_column_2;
1666
33
        bool is_const;
1667
33
        std::tie(argument_column_2, is_const) =
1668
33
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
33
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
33
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
31
        } else {
1675
31
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
31
                                                          argument_column_2, input_rows_count));
1677
31
        }
1678
1679
33
        block.replace_by_position(result, std::move(res_column));
1680
33
        return Status::OK();
1681
33
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
9
                        uint32_t result, size_t input_rows_count) const override {
1662
9
        auto res_column = ColumnString::create();
1663
9
        ColumnPtr argument_column =
1664
9
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
9
        ColumnPtr argument_column_2;
1666
9
        bool is_const;
1667
9
        std::tie(argument_column_2, is_const) =
1668
9
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
9
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
9
        if (is_const) {
1672
4
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
4
                                                         argument_column_2, input_rows_count));
1674
5
        } else {
1675
5
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
5
                                                          argument_column_2, input_rows_count));
1677
5
        }
1678
1679
9
        block.replace_by_position(result, std::move(res_column));
1680
9
        return Status::OK();
1681
9
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
3
                        uint32_t result, size_t input_rows_count) const override {
1662
3
        auto res_column = ColumnString::create();
1663
3
        ColumnPtr argument_column =
1664
3
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
3
        ColumnPtr argument_column_2;
1666
3
        bool is_const;
1667
3
        std::tie(argument_column_2, is_const) =
1668
3
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
3
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
3
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
2
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
3
        block.replace_by_position(result, std::move(res_column));
1680
3
        return Status::OK();
1681
3
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
27
                        uint32_t result, size_t input_rows_count) const override {
1662
27
        auto res_column = ColumnString::create();
1663
27
        ColumnPtr argument_column =
1664
27
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
27
        ColumnPtr argument_column_2;
1666
27
        bool is_const;
1667
27
        std::tie(argument_column_2, is_const) =
1668
27
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
27
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
27
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
27
        } else {
1675
27
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
27
                                                          argument_column_2, input_rows_count));
1677
27
        }
1678
1679
27
        block.replace_by_position(result, std::move(res_column));
1680
27
        return Status::OK();
1681
27
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
17
                        uint32_t result, size_t input_rows_count) const override {
1662
17
        auto res_column = ColumnString::create();
1663
17
        ColumnPtr argument_column =
1664
17
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
17
        ColumnPtr argument_column_2;
1666
17
        bool is_const;
1667
17
        std::tie(argument_column_2, is_const) =
1668
17
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
17
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
17
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
15
        } else {
1675
15
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
15
                                                          argument_column_2, input_rows_count));
1677
15
        }
1678
1679
15
        block.replace_by_position(result, std::move(res_column));
1680
15
        return Status::OK();
1681
17
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
4
                        uint32_t result, size_t input_rows_count) const override {
1662
4
        auto res_column = ColumnString::create();
1663
4
        ColumnPtr argument_column =
1664
4
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
4
        ColumnPtr argument_column_2;
1666
4
        bool is_const;
1667
4
        std::tie(argument_column_2, is_const) =
1668
4
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
4
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
4
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
2
        } else {
1675
2
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
2
                                                          argument_column_2, input_rows_count));
1677
2
        }
1678
1679
4
        block.replace_by_position(result, std::move(res_column));
1680
4
        return Status::OK();
1681
4
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
1682
};
1683
1684
class FunctionSplitPart : public IFunction {
1685
public:
1686
    static constexpr auto name = "split_part";
1687
32
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
1688
1
    String get_name() const override { return name; }
1689
23
    size_t get_number_of_arguments() const override { return 3; }
1690
1691
23
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1692
23
        return make_nullable(std::make_shared<DataTypeString>());
1693
23
    }
1694
1695
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1696
45
                        uint32_t result, size_t input_rows_count) const override {
1697
45
        DCHECK_EQ(arguments.size(), 3);
1698
1699
45
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1700
        // Create a zero column to simply implement
1701
45
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1702
45
        auto res = ColumnString::create();
1703
1704
45
        auto& null_map_data = null_map->get_data();
1705
45
        auto& res_offsets = res->get_offsets();
1706
45
        auto& res_chars = res->get_chars();
1707
45
        res_offsets.resize(input_rows_count);
1708
1709
45
        const size_t argument_size = arguments.size();
1710
45
        std::vector<ColumnPtr> argument_columns(argument_size);
1711
180
        for (size_t i = 0; i < argument_size; ++i) {
1712
135
            argument_columns[i] =
1713
135
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
1714
135
            if (const auto* nullable =
1715
135
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
1716
                // Danger: Here must dispose the null map data first! Because
1717
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
1718
                // of column nullable mem of null map
1719
0
                VectorizedUtils::update_null_map(null_map->get_data(),
1720
0
                                                 nullable->get_null_map_data());
1721
0
                argument_columns[i] = nullable->get_nested_column_ptr();
1722
0
            }
1723
135
        }
1724
1725
45
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
1726
1727
45
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
1728
1729
45
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
1730
45
        const auto& part_num_col_data = part_num_col->get_data();
1731
1732
186
        for (size_t i = 0; i < input_rows_count; ++i) {
1733
141
            if (part_num_col_data[i] == 0) {
1734
0
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1735
0
                continue;
1736
0
            }
1737
1738
141
            auto delimiter = delimiter_col->get_data_at(i);
1739
141
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
1740
141
            auto part_number = part_num_col_data[i];
1741
141
            auto str = str_col->get_data_at(i);
1742
141
            if (delimiter.size == 0) {
1743
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
1744
0
                continue;
1745
0
            }
1746
1747
141
            if (part_number > 0) {
1748
131
                if (delimiter.size == 1) {
1749
                    // If delimiter is a char, use memchr to split
1750
127
                    int32_t pre_offset = -1;
1751
127
                    int32_t offset = -1;
1752
127
                    int32_t num = 0;
1753
187
                    while (num < part_number) {
1754
154
                        pre_offset = offset;
1755
154
                        size_t n = str.size - offset - 1;
1756
154
                        const char* pos = reinterpret_cast<const char*>(
1757
154
                                memchr(str.data + offset + 1, delimiter_str[0], n));
1758
154
                        if (pos != nullptr) {
1759
60
                            offset = pos - str.data;
1760
60
                            num++;
1761
94
                        } else {
1762
94
                            offset = str.size;
1763
94
                            num = (num == 0) ? 0 : num + 1;
1764
94
                            break;
1765
94
                        }
1766
154
                    }
1767
1768
127
                    if (num == part_number) {
1769
53
                        StringOP::push_value_string(
1770
53
                                std::string_view {
1771
53
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
1772
53
                                        (size_t)offset - pre_offset - 1},
1773
53
                                i, res_chars, res_offsets);
1774
74
                    } else {
1775
74
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1776
74
                    }
1777
127
                } else {
1778
                    // If delimiter is a string, use memmem to split
1779
4
                    int32_t pre_offset = -delimiter.size;
1780
4
                    int32_t offset = -delimiter.size;
1781
4
                    int32_t num = 0;
1782
8
                    while (num < part_number) {
1783
7
                        pre_offset = offset;
1784
7
                        size_t n = str.size - offset - delimiter.size;
1785
7
                        char* pos =
1786
7
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
1787
7
                                                               n, delimiter.data, delimiter.size));
1788
7
                        if (pos != nullptr) {
1789
4
                            offset = pos - str.data;
1790
4
                            num++;
1791
4
                        } else {
1792
3
                            offset = str.size;
1793
3
                            num = (num == 0) ? 0 : num + 1;
1794
3
                            break;
1795
3
                        }
1796
7
                    }
1797
1798
4
                    if (num == part_number) {
1799
2
                        StringOP::push_value_string(
1800
2
                                std::string_view {reinterpret_cast<const char*>(
1801
2
                                                          str.data + pre_offset + delimiter.size),
1802
2
                                                  (size_t)offset - pre_offset - delimiter.size},
1803
2
                                i, res_chars, res_offsets);
1804
2
                    } else {
1805
2
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1806
2
                    }
1807
4
                }
1808
131
            } else {
1809
10
                part_number = -part_number;
1810
10
                auto str_str = str.to_string();
1811
10
                int32_t offset = str.size;
1812
10
                int32_t pre_offset = offset;
1813
10
                int32_t num = 0;
1814
10
                auto substr = str_str;
1815
20
                while (num <= part_number && offset >= 0) {
1816
20
                    offset = (int)substr.rfind(delimiter, offset);
1817
20
                    if (offset != -1) {
1818
17
                        if (++num == part_number) {
1819
7
                            break;
1820
7
                        }
1821
10
                        pre_offset = offset;
1822
10
                        offset = offset - 1;
1823
10
                        substr = str_str.substr(0, pre_offset);
1824
10
                    } else {
1825
3
                        break;
1826
3
                    }
1827
20
                }
1828
10
                num = (offset == -1 && num != 0) ? num + 1 : num;
1829
1830
10
                if (num == part_number) {
1831
8
                    if (offset == -1) {
1832
1
                        StringOP::push_value_string(
1833
1
                                std::string_view {reinterpret_cast<const char*>(str.data),
1834
1
                                                  (size_t)pre_offset},
1835
1
                                i, res_chars, res_offsets);
1836
7
                    } else {
1837
7
                        StringOP::push_value_string(
1838
7
                                std::string_view {str_str.substr(
1839
7
                                        offset + delimiter.size,
1840
7
                                        (size_t)pre_offset - offset - delimiter.size)},
1841
7
                                i, res_chars, res_offsets);
1842
7
                    }
1843
8
                } else {
1844
2
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1845
2
                }
1846
10
            }
1847
141
        }
1848
1849
45
        block.get_by_position(result).column =
1850
45
                ColumnNullable::create(std::move(res), std::move(null_map));
1851
45
        return Status::OK();
1852
45
    }
1853
};
1854
1855
class FunctionSubstringIndex : public IFunction {
1856
public:
1857
    static constexpr auto name = "substring_index";
1858
22
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
1859
1
    String get_name() const override { return name; }
1860
13
    size_t get_number_of_arguments() const override { return 3; }
1861
1862
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1863
13
        return std::make_shared<DataTypeString>();
1864
13
    }
1865
1866
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1867
25
                        uint32_t result, size_t input_rows_count) const override {
1868
25
        DCHECK_EQ(arguments.size(), 3);
1869
1870
        // Create a zero column to simply implement
1871
25
        auto res = ColumnString::create();
1872
1873
25
        auto& res_offsets = res->get_offsets();
1874
25
        auto& res_chars = res->get_chars();
1875
25
        res_offsets.resize(input_rows_count);
1876
25
        ColumnPtr content_column;
1877
25
        bool content_const = false;
1878
25
        std::tie(content_column, content_const) =
1879
25
                unpack_if_const(block.get_by_position(arguments[0]).column);
1880
1881
25
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
1882
1883
        // Handle both constant and non-constant delimiter parameters
1884
25
        ColumnPtr delimiter_column_ptr;
1885
25
        bool delimiter_const = false;
1886
25
        std::tie(delimiter_column_ptr, delimiter_const) =
1887
25
                unpack_if_const(block.get_by_position(arguments[1]).column);
1888
25
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
1889
1890
25
        ColumnPtr part_num_column_ptr;
1891
25
        bool part_num_const = false;
1892
25
        std::tie(part_num_column_ptr, part_num_const) =
1893
25
                unpack_if_const(block.get_by_position(arguments[2]).column);
1894
25
        const ColumnInt32* part_num_col =
1895
25
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
1896
1897
        // For constant multi-character delimiters, create StringRef and StringSearch only once
1898
25
        std::optional<StringRef> const_delimiter_ref;
1899
25
        std::optional<StringSearch> const_search;
1900
25
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
1901
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
1902
0
            const_search.emplace(&const_delimiter_ref.value());
1903
0
        }
1904
1905
133
        for (size_t i = 0; i < input_rows_count; ++i) {
1906
108
            auto str = str_col->get_data_at(content_const ? 0 : i);
1907
108
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
1908
108
            int32_t delimiter_size = delimiter.size;
1909
1910
108
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
1911
1912
108
            if (part_number == 0 || delimiter_size == 0) {
1913
2
                StringOP::push_empty_string(i, res_chars, res_offsets);
1914
2
                continue;
1915
2
            }
1916
1917
106
            if (part_number > 0) {
1918
74
                if (delimiter_size == 1) {
1919
59
                    int32_t offset = -1;
1920
59
                    int32_t num = 0;
1921
71
                    while (num < part_number) {
1922
65
                        size_t n = str.size - offset - 1;
1923
65
                        const char* pos = reinterpret_cast<const char*>(
1924
65
                                memchr(str.data + offset + 1, delimiter.data[0], n));
1925
65
                        if (pos != nullptr) {
1926
12
                            offset = pos - str.data;
1927
12
                            num++;
1928
53
                        } else {
1929
53
                            offset = str.size;
1930
53
                            num = (num == 0) ? 0 : num + 1;
1931
53
                            break;
1932
53
                        }
1933
65
                    }
1934
1935
59
                    if (num == part_number) {
1936
6
                        StringOP::push_value_string(
1937
6
                                std::string_view {reinterpret_cast<const char*>(str.data),
1938
6
                                                  (size_t)offset},
1939
6
                                i, res_chars, res_offsets);
1940
53
                    } else {
1941
53
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
1942
53
                                                    res_chars, res_offsets);
1943
53
                    }
1944
59
                } else {
1945
                    // For multi-character delimiters
1946
                    // Use pre-created StringRef and StringSearch for constant delimiters
1947
15
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
1948
15
                                                                  : StringRef(delimiter);
1949
15
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
1950
15
                    StringSearch local_search(&delimiter_ref);
1951
15
                    if (!search_ptr) {
1952
15
                        search_ptr = &local_search;
1953
15
                    }
1954
1955
15
                    int32_t offset = -delimiter_size;
1956
15
                    int32_t num = 0;
1957
31
                    while (num < part_number) {
1958
17
                        size_t n = str.size - offset - delimiter_size;
1959
                        // search first match delimter_ref index from src string among str_offset to end
1960
17
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
1961
17
                        if (pos < str.data + str.size) {
1962
16
                            offset = pos - str.data;
1963
16
                            num++;
1964
16
                        } else {
1965
1
                            offset = str.size;
1966
1
                            num = (num == 0) ? 0 : num + 1;
1967
1
                            break;
1968
1
                        }
1969
17
                    }
1970
1971
15
                    if (num == part_number) {
1972
14
                        StringOP::push_value_string(
1973
14
                                std::string_view {reinterpret_cast<const char*>(str.data),
1974
14
                                                  (size_t)offset},
1975
14
                                i, res_chars, res_offsets);
1976
14
                    } else {
1977
1
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
1978
1
                                                    res_chars, res_offsets);
1979
1
                    }
1980
15
                }
1981
74
            } else {
1982
32
                int neg_part_number = -part_number;
1983
32
                auto str_str = str.to_string();
1984
32
                int32_t offset = str.size;
1985
32
                int32_t pre_offset = offset;
1986
32
                int32_t num = 0;
1987
32
                auto substr = str_str;
1988
1989
                // Use pre-created StringRef for constant delimiters
1990
32
                StringRef delimiter_str =
1991
32
                        const_delimiter_ref
1992
32
                                ? const_delimiter_ref.value()
1993
32
                                : StringRef(reinterpret_cast<const char*>(delimiter.data),
1994
32
                                            delimiter.size);
1995
1996
36
                while (num <= neg_part_number && offset >= 0) {
1997
36
                    offset = (int)substr.rfind(delimiter_str, offset);
1998
36
                    if (offset != -1) {
1999
32
                        if (++num == neg_part_number) {
2000
28
                            break;
2001
28
                        }
2002
4
                        pre_offset = offset;
2003
4
                        offset = offset - 1;
2004
4
                        substr = str_str.substr(0, pre_offset);
2005
4
                    } else {
2006
4
                        break;
2007
4
                    }
2008
36
                }
2009
32
                num = (offset == -1 && num != 0) ? num + 1 : num;
2010
2011
32
                if (num == neg_part_number) {
2012
28
                    if (offset == -1) {
2013
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
2014
0
                                                    res_chars, res_offsets);
2015
28
                    } else {
2016
28
                        StringOP::push_value_string(
2017
28
                                std::string_view {str.data + offset + delimiter_size,
2018
28
                                                  str.size - offset - delimiter_size},
2019
28
                                i, res_chars, res_offsets);
2020
28
                    }
2021
28
                } else {
2022
4
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
2023
4
                                                res_offsets);
2024
4
                }
2025
32
            }
2026
106
        }
2027
2028
25
        block.get_by_position(result).column = std::move(res);
2029
25
        return Status::OK();
2030
25
    }
2031
};
2032
2033
class FunctionSplitByString : public IFunction {
2034
public:
2035
    static constexpr auto name = "split_by_string";
2036
2037
80
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
2038
    using NullMapType = PaddedPODArray<UInt8>;
2039
2040
1
    String get_name() const override { return name; }
2041
2042
72
    bool is_variadic() const override { return false; }
2043
2044
71
    size_t get_number_of_arguments() const override { return 2; }
2045
2046
71
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2047
71
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
2048
0
                << "first argument for function: " << name << " should be string"
2049
0
                << " and arguments[0] is " << arguments[0]->get_name();
2050
71
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
2051
0
                << "second argument for function: " << name << " should be string"
2052
0
                << " and arguments[1] is " << arguments[1]->get_name();
2053
71
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
2054
71
    }
2055
2056
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
2057
101
                        uint32_t result, size_t input_rows_count) const override {
2058
101
        DCHECK_EQ(arguments.size(), 2);
2059
2060
101
        const auto& [src_column, left_const] =
2061
101
                unpack_if_const(block.get_by_position(arguments[0]).column);
2062
101
        const auto& [right_column, right_const] =
2063
101
                unpack_if_const(block.get_by_position(arguments[1]).column);
2064
2065
101
        DataTypePtr right_column_type = block.get_by_position(arguments[1]).type;
2066
101
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
2067
101
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
2068
101
                                                   ColumnArray::ColumnOffsets::create());
2069
2070
101
        dest_column_ptr->resize(0);
2071
101
        auto& dest_offsets = dest_column_ptr->get_offsets();
2072
2073
101
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
2074
101
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
2075
2076
101
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
2077
2078
101
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
2079
2080
101
        std::visit(
2081
101
                [&](auto src_const, auto delimiter_const) {
2082
101
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2083
101
                                                         *dest_nested_column, dest_offsets,
2084
101
                                                         input_rows_count);
2085
101
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_
Line
Count
Source
2081
6
                [&](auto src_const, auto delimiter_const) {
2082
6
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2083
6
                                                         *dest_nested_column, dest_offsets,
2084
6
                                                         input_rows_count);
2085
6
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_
Line
Count
Source
2081
87
                [&](auto src_const, auto delimiter_const) {
2082
87
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2083
87
                                                         *dest_nested_column, dest_offsets,
2084
87
                                                         input_rows_count);
2085
87
                },
_ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_
Line
Count
Source
2081
8
                [&](auto src_const, auto delimiter_const) {
2082
8
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2083
8
                                                         *dest_nested_column, dest_offsets,
2084
8
                                                         input_rows_count);
2085
8
                },
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_
2086
101
                make_bool_variant(left_const), make_bool_variant(right_const));
2087
2088
        // all elements in dest_nested_column are not null
2089
101
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
2090
101
                                                                       false);
2091
101
        block.replace_by_position(result, std::move(dest_column_ptr));
2092
2093
101
        return Status::OK();
2094
101
    }
2095
2096
private:
2097
    template <bool src_const, bool delimiter_const>
2098
    void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column,
2099
                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
2100
101
                  size_t size) const {
2101
101
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2102
101
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2103
101
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2104
101
        column_string_chars.reserve(0);
2105
2106
101
        ColumnArray::Offset64 string_pos = 0;
2107
101
        ColumnArray::Offset64 dest_pos = 0;
2108
2109
101
        StringSearch search;
2110
101
        StringRef delimiter_ref_for_search;
2111
2112
101
        if constexpr (delimiter_const) {
2113
87
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2114
87
            search.set_pattern(&delimiter_ref_for_search);
2115
87
        }
2116
2117
1.13k
        for (size_t i = 0; i < size; i++) {
2118
1.02k
            const StringRef str_ref =
2119
1.02k
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2120
1.02k
            const StringRef delimiter_ref =
2121
1.02k
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2122
2123
1.02k
            if (str_ref.size == 0) {
2124
159
                dest_offsets.push_back(dest_pos);
2125
159
                continue;
2126
159
            }
2127
870
            if (delimiter_ref.size == 0) {
2128
20
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2129
20
                                      string_pos, dest_pos);
2130
850
            } else {
2131
850
                if constexpr (!delimiter_const) {
2132
14
                    search.set_pattern(&delimiter_ref);
2133
14
                }
2134
51.6k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2135
50.7k
                    const size_t str_offset = str_pos;
2136
50.7k
                    const size_t old_size = column_string_chars.size();
2137
                    // search first match delimter_ref index from src string among str_offset to end
2138
50.7k
                    const char* result_start =
2139
50.7k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2140
                    // compute split part size
2141
50.7k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2142
                    // save dist string split part
2143
50.7k
                    if (split_part_size > 0) {
2144
50.1k
                        const size_t new_size = old_size + split_part_size;
2145
50.1k
                        column_string_chars.resize(new_size);
2146
50.1k
                        memcpy_small_allow_read_write_overflow15(
2147
50.1k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2148
50.1k
                                split_part_size);
2149
                        // add dist string offset
2150
50.1k
                        string_pos += split_part_size;
2151
50.1k
                    }
2152
50.7k
                    column_string_offsets.push_back(string_pos);
2153
                    // array offset + 1
2154
50.7k
                    dest_pos++;
2155
                    // add src string str_pos to next search start
2156
50.7k
                    str_pos += split_part_size + delimiter_ref.size;
2157
50.7k
                }
2158
850
            }
2159
870
            dest_offsets.push_back(dest_pos);
2160
870
        }
2161
101
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2100
6
                  size_t size) const {
2101
6
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2102
6
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2103
6
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2104
6
        column_string_chars.reserve(0);
2105
2106
6
        ColumnArray::Offset64 string_pos = 0;
2107
6
        ColumnArray::Offset64 dest_pos = 0;
2108
2109
6
        StringSearch search;
2110
6
        StringRef delimiter_ref_for_search;
2111
2112
        if constexpr (delimiter_const) {
2113
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2114
            search.set_pattern(&delimiter_ref_for_search);
2115
        }
2116
2117
32
        for (size_t i = 0; i < size; i++) {
2118
26
            const StringRef str_ref =
2119
26
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2120
26
            const StringRef delimiter_ref =
2121
26
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2122
2123
26
            if (str_ref.size == 0) {
2124
8
                dest_offsets.push_back(dest_pos);
2125
8
                continue;
2126
8
            }
2127
18
            if (delimiter_ref.size == 0) {
2128
4
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2129
4
                                      string_pos, dest_pos);
2130
14
            } else {
2131
14
                if constexpr (!delimiter_const) {
2132
14
                    search.set_pattern(&delimiter_ref);
2133
14
                }
2134
71
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2135
57
                    const size_t str_offset = str_pos;
2136
57
                    const size_t old_size = column_string_chars.size();
2137
                    // search first match delimter_ref index from src string among str_offset to end
2138
57
                    const char* result_start =
2139
57
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2140
                    // compute split part size
2141
57
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2142
                    // save dist string split part
2143
57
                    if (split_part_size > 0) {
2144
44
                        const size_t new_size = old_size + split_part_size;
2145
44
                        column_string_chars.resize(new_size);
2146
44
                        memcpy_small_allow_read_write_overflow15(
2147
44
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2148
44
                                split_part_size);
2149
                        // add dist string offset
2150
44
                        string_pos += split_part_size;
2151
44
                    }
2152
57
                    column_string_offsets.push_back(string_pos);
2153
                    // array offset + 1
2154
57
                    dest_pos++;
2155
                    // add src string str_pos to next search start
2156
57
                    str_pos += split_part_size + delimiter_ref.size;
2157
57
                }
2158
14
            }
2159
18
            dest_offsets.push_back(dest_pos);
2160
18
        }
2161
6
    }
_ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2100
87
                  size_t size) const {
2101
87
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2102
87
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2103
87
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2104
87
        column_string_chars.reserve(0);
2105
2106
87
        ColumnArray::Offset64 string_pos = 0;
2107
87
        ColumnArray::Offset64 dest_pos = 0;
2108
2109
87
        StringSearch search;
2110
87
        StringRef delimiter_ref_for_search;
2111
2112
87
        if constexpr (delimiter_const) {
2113
87
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2114
87
            search.set_pattern(&delimiter_ref_for_search);
2115
87
        }
2116
2117
1.06k
        for (size_t i = 0; i < size; i++) {
2118
979
            const StringRef str_ref =
2119
979
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2120
979
            const StringRef delimiter_ref =
2121
979
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2122
2123
979
            if (str_ref.size == 0) {
2124
135
                dest_offsets.push_back(dest_pos);
2125
135
                continue;
2126
135
            }
2127
844
            if (delimiter_ref.size == 0) {
2128
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2129
8
                                      string_pos, dest_pos);
2130
836
            } else {
2131
                if constexpr (!delimiter_const) {
2132
                    search.set_pattern(&delimiter_ref);
2133
                }
2134
51.5k
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2135
50.7k
                    const size_t str_offset = str_pos;
2136
50.7k
                    const size_t old_size = column_string_chars.size();
2137
                    // search first match delimter_ref index from src string among str_offset to end
2138
50.7k
                    const char* result_start =
2139
50.7k
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2140
                    // compute split part size
2141
50.7k
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2142
                    // save dist string split part
2143
50.7k
                    if (split_part_size > 0) {
2144
50.1k
                        const size_t new_size = old_size + split_part_size;
2145
50.1k
                        column_string_chars.resize(new_size);
2146
50.1k
                        memcpy_small_allow_read_write_overflow15(
2147
50.1k
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2148
50.1k
                                split_part_size);
2149
                        // add dist string offset
2150
50.1k
                        string_pos += split_part_size;
2151
50.1k
                    }
2152
50.7k
                    column_string_offsets.push_back(string_pos);
2153
                    // array offset + 1
2154
50.7k
                    dest_pos++;
2155
                    // add src string str_pos to next search start
2156
50.7k
                    str_pos += split_part_size + delimiter_ref.size;
2157
50.7k
                }
2158
836
            }
2159
844
            dest_offsets.push_back(dest_pos);
2160
844
        }
2161
87
    }
_ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2100
8
                  size_t size) const {
2101
8
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2102
8
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2103
8
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2104
8
        column_string_chars.reserve(0);
2105
2106
8
        ColumnArray::Offset64 string_pos = 0;
2107
8
        ColumnArray::Offset64 dest_pos = 0;
2108
2109
8
        StringSearch search;
2110
8
        StringRef delimiter_ref_for_search;
2111
2112
        if constexpr (delimiter_const) {
2113
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2114
            search.set_pattern(&delimiter_ref_for_search);
2115
        }
2116
2117
32
        for (size_t i = 0; i < size; i++) {
2118
24
            const StringRef str_ref =
2119
24
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2120
24
            const StringRef delimiter_ref =
2121
24
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2122
2123
24
            if (str_ref.size == 0) {
2124
16
                dest_offsets.push_back(dest_pos);
2125
16
                continue;
2126
16
            }
2127
8
            if (delimiter_ref.size == 0) {
2128
8
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2129
8
                                      string_pos, dest_pos);
2130
8
            } else {
2131
0
                if constexpr (!delimiter_const) {
2132
0
                    search.set_pattern(&delimiter_ref);
2133
0
                }
2134
0
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2135
0
                    const size_t str_offset = str_pos;
2136
0
                    const size_t old_size = column_string_chars.size();
2137
                    // search first match delimter_ref index from src string among str_offset to end
2138
0
                    const char* result_start =
2139
0
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2140
                    // compute split part size
2141
0
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2142
                    // save dist string split part
2143
0
                    if (split_part_size > 0) {
2144
0
                        const size_t new_size = old_size + split_part_size;
2145
0
                        column_string_chars.resize(new_size);
2146
0
                        memcpy_small_allow_read_write_overflow15(
2147
0
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2148
0
                                split_part_size);
2149
                        // add dist string offset
2150
0
                        string_pos += split_part_size;
2151
0
                    }
2152
0
                    column_string_offsets.push_back(string_pos);
2153
                    // array offset + 1
2154
0
                    dest_pos++;
2155
                    // add src string str_pos to next search start
2156
0
                    str_pos += split_part_size + delimiter_ref.size;
2157
0
                }
2158
0
            }
2159
8
            dest_offsets.push_back(dest_pos);
2160
8
        }
2161
8
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
2162
2163
    void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars,
2164
                               ColumnString::Offsets& column_string_offsets,
2165
                               ColumnArray::Offset64& string_pos,
2166
20
                               ColumnArray::Offset64& dest_pos) const {
2167
20
        const size_t old_size = column_string_chars.size();
2168
20
        const size_t new_size = old_size + str_ref.size;
2169
20
        column_string_chars.resize(new_size);
2170
20
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
2171
20
        if (simd::VStringFunctions::is_ascii(str_ref)) {
2172
19
            const auto size = str_ref.size;
2173
2174
19
            const auto nested_old_size = column_string_offsets.size();
2175
19
            const auto nested_new_size = nested_old_size + size;
2176
19
            column_string_offsets.resize(nested_new_size);
2177
19
            std::iota(column_string_offsets.data() + nested_old_size,
2178
19
                      column_string_offsets.data() + nested_new_size, string_pos + 1);
2179
2180
19
            string_pos += size;
2181
19
            dest_pos += size;
2182
            // The above code is equivalent to the code in the following comment.
2183
            // for (size_t i = 0; i < str_ref.size; i++) {
2184
            //     string_pos++;
2185
            //     column_string_offsets.push_back(string_pos);
2186
            //     (*dest_nested_null_map).push_back(false);
2187
            //     dest_pos++;
2188
            // }
2189
19
        } else {
2190
8
            for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
2191
7
                utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
2192
2193
7
                string_pos += utf8_char_len;
2194
7
                column_string_offsets.push_back(string_pos);
2195
7
                dest_pos++;
2196
7
            }
2197
1
        }
2198
20
    }
2199
};
2200
2201
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
2202
2203
template <FunctionCountSubStringType type>
2204
class FunctionCountSubString : public IFunction {
2205
public:
2206
    static constexpr auto name = "count_substrings";
2207
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
2208
2209
282
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
2209
76
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
2209
206
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
2210
    using NullMapType = PaddedPODArray<UInt8>;
2211
2212
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
2213
2214
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
2215
2216
264
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2217
264
        return std::make_shared<DataTypeInt32>();
2218
264
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2216
67
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2217
67
        return std::make_shared<DataTypeInt32>();
2218
67
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2216
197
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2217
197
        return std::make_shared<DataTypeInt32>();
2218
197
    }
2219
2220
16
    DataTypes get_variadic_argument_types_impl() const override {
2221
16
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2222
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2223
8
        } else {
2224
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2225
8
                    std::make_shared<DataTypeInt32>()};
2226
8
        }
2227
16
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
2220
8
    DataTypes get_variadic_argument_types_impl() const override {
2221
8
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2222
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2223
        } else {
2224
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2225
                    std::make_shared<DataTypeInt32>()};
2226
        }
2227
8
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
2220
8
    DataTypes get_variadic_argument_types_impl() const override {
2221
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2222
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2223
8
        } else {
2224
8
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2225
8
                    std::make_shared<DataTypeInt32>()};
2226
8
        }
2227
8
    }
2228
2229
266
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
2229
68
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
2229
198
    bool is_variadic() const override { return true; }
2230
2231
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
2232
234
                        uint32_t result, size_t input_rows_count) const override {
2233
234
        DCHECK(arg_count);
2234
234
        bool col_const[arg_count];
2235
234
        ColumnPtr argument_columns[arg_count];
2236
878
        for (int i = 0; i < arg_count; ++i) {
2237
644
            std::tie(argument_columns[i], col_const[i]) =
2238
644
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2239
644
        }
2240
2241
234
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2242
234
        auto& dest_column_data = dest_column_ptr->get_data();
2243
2244
234
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2245
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2246
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2247
58
            std::visit(
2248
58
                    [&](auto str_const, auto pattern_const) {
2249
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
58
                                                           dest_column_data, input_rows_count);
2251
58
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
2248
32
                    [&](auto str_const, auto pattern_const) {
2249
32
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
32
                                                           dest_column_data, input_rows_count);
2251
32
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
2248
13
                    [&](auto str_const, auto pattern_const) {
2249
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
13
                                                           dest_column_data, input_rows_count);
2251
13
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
2248
13
                    [&](auto str_const, auto pattern_const) {
2249
13
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
13
                                                           dest_column_data, input_rows_count);
2251
13
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
2252
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2253
176
        } else {
2254
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2255
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2256
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2257
176
            std::visit(
2258
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
176
                        _execute<str_const, pattern_const, start_pos_const>(
2260
176
                                src_column_string, pattern_column, start_pos_column,
2261
176
                                dest_column_data, input_rows_count);
2262
176
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
2258
36
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
36
                        _execute<str_const, pattern_const, start_pos_const>(
2260
36
                                src_column_string, pattern_column, start_pos_column,
2261
36
                                dest_column_data, input_rows_count);
2262
36
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
2258
29
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
29
                        _execute<str_const, pattern_const, start_pos_const>(
2260
29
                                src_column_string, pattern_column, start_pos_column,
2261
29
                                dest_column_data, input_rows_count);
2262
29
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
2258
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
23
                        _execute<str_const, pattern_const, start_pos_const>(
2260
23
                                src_column_string, pattern_column, start_pos_column,
2261
23
                                dest_column_data, input_rows_count);
2262
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
2263
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2264
176
                    make_bool_variant(col_const[2]));
2265
176
        }
2266
2267
234
        block.replace_by_position(result, std::move(dest_column_ptr));
2268
234
        return Status::OK();
2269
234
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2232
58
                        uint32_t result, size_t input_rows_count) const override {
2233
58
        DCHECK(arg_count);
2234
58
        bool col_const[arg_count];
2235
58
        ColumnPtr argument_columns[arg_count];
2236
174
        for (int i = 0; i < arg_count; ++i) {
2237
116
            std::tie(argument_columns[i], col_const[i]) =
2238
116
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2239
116
        }
2240
2241
58
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2242
58
        auto& dest_column_data = dest_column_ptr->get_data();
2243
2244
58
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2245
58
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2246
58
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2247
58
            std::visit(
2248
58
                    [&](auto str_const, auto pattern_const) {
2249
58
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
58
                                                           dest_column_data, input_rows_count);
2251
58
                    },
2252
58
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2253
        } else {
2254
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2255
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2256
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2257
            std::visit(
2258
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
                        _execute<str_const, pattern_const, start_pos_const>(
2260
                                src_column_string, pattern_column, start_pos_column,
2261
                                dest_column_data, input_rows_count);
2262
                    },
2263
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2264
                    make_bool_variant(col_const[2]));
2265
        }
2266
2267
58
        block.replace_by_position(result, std::move(dest_column_ptr));
2268
58
        return Status::OK();
2269
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2232
176
                        uint32_t result, size_t input_rows_count) const override {
2233
176
        DCHECK(arg_count);
2234
176
        bool col_const[arg_count];
2235
176
        ColumnPtr argument_columns[arg_count];
2236
704
        for (int i = 0; i < arg_count; ++i) {
2237
528
            std::tie(argument_columns[i], col_const[i]) =
2238
528
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2239
528
        }
2240
2241
176
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2242
176
        auto& dest_column_data = dest_column_ptr->get_data();
2243
2244
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2245
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2246
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2247
            std::visit(
2248
                    [&](auto str_const, auto pattern_const) {
2249
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
                                                           dest_column_data, input_rows_count);
2251
                    },
2252
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2253
176
        } else {
2254
176
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2255
176
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2256
176
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2257
176
            std::visit(
2258
176
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
176
                        _execute<str_const, pattern_const, start_pos_const>(
2260
176
                                src_column_string, pattern_column, start_pos_column,
2261
176
                                dest_column_data, input_rows_count);
2262
176
                    },
2263
176
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2264
176
                    make_bool_variant(col_const[2]));
2265
176
        }
2266
2267
176
        block.replace_by_position(result, std::move(dest_column_ptr));
2268
176
        return Status::OK();
2269
176
    }
2270
2271
private:
2272
    template <bool src_const, bool pattern_const>
2273
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
2274
58
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
241
        for (size_t i = 0; i < size; i++) {
2276
183
            const StringRef str_ref =
2277
183
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
183
            const StringRef pattern_ref =
2280
183
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
183
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
183
        }
2283
58
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2274
32
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
133
        for (size_t i = 0; i < size; i++) {
2276
101
            const StringRef str_ref =
2277
101
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
101
            const StringRef pattern_ref =
2280
101
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
101
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
101
        }
2283
32
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2274
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
54
        for (size_t i = 0; i < size; i++) {
2276
41
            const StringRef str_ref =
2277
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
41
            const StringRef pattern_ref =
2280
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
41
        }
2283
13
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2274
13
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
54
        for (size_t i = 0; i < size; i++) {
2276
41
            const StringRef str_ref =
2277
41
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
41
            const StringRef pattern_ref =
2280
41
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
41
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
41
        }
2283
13
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
2284
2285
    template <bool src_const, bool pattern_const, bool start_pos_const>
2286
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
2287
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
2288
176
                  size_t size) const {
2289
411
        for (size_t i = 0; i < size; i++) {
2290
235
            const StringRef str_ref =
2291
235
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
235
            const StringRef pattern_ref =
2293
235
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
235
            int32_t start_pos =
2296
235
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
235
            const char* p = str_ref.begin();
2299
235
            const char* end = str_ref.end();
2300
235
            int char_size = 0;
2301
1.47k
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
1.24k
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
1.24k
            }
2304
235
            const auto start_byte_len = p - str_ref.begin();
2305
2306
235
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
134
                dest_column_data[i] = 0;
2308
134
            } else {
2309
101
                dest_column_data[i] =
2310
101
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
101
            }
2312
235
        }
2313
176
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
36
                  size_t size) const {
2289
97
        for (size_t i = 0; i < size; i++) {
2290
61
            const StringRef str_ref =
2291
61
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
61
            const StringRef pattern_ref =
2293
61
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
61
            int32_t start_pos =
2296
61
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
61
            const char* p = str_ref.begin();
2299
61
            const char* end = str_ref.end();
2300
61
            int char_size = 0;
2301
456
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
395
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
395
            }
2304
61
            const auto start_byte_len = p - str_ref.begin();
2305
2306
61
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
38
                dest_column_data[i] = 0;
2308
38
            } else {
2309
23
                dest_column_data[i] =
2310
23
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
23
            }
2312
61
        }
2313
36
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
29
                  size_t size) const {
2289
78
        for (size_t i = 0; i < size; i++) {
2290
49
            const StringRef str_ref =
2291
49
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
49
            const StringRef pattern_ref =
2293
49
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
49
            int32_t start_pos =
2296
49
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
49
            const char* p = str_ref.begin();
2299
49
            const char* end = str_ref.end();
2300
49
            int char_size = 0;
2301
242
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
193
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
193
            }
2304
49
            const auto start_byte_len = p - str_ref.begin();
2305
2306
49
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
22
                dest_column_data[i] = 0;
2308
27
            } else {
2309
27
                dest_column_data[i] =
2310
27
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
27
            }
2312
49
        }
2313
29
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
23
                  size_t size) const {
2289
60
        for (size_t i = 0; i < size; i++) {
2290
37
            const StringRef str_ref =
2291
37
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
37
            const StringRef pattern_ref =
2293
37
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
37
            int32_t start_pos =
2296
37
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
37
            const char* p = str_ref.begin();
2299
37
            const char* end = str_ref.end();
2300
37
            int char_size = 0;
2301
177
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
140
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
140
            }
2304
37
            const auto start_byte_len = p - str_ref.begin();
2305
2306
37
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
18
                dest_column_data[i] = 0;
2308
19
            } else {
2309
19
                dest_column_data[i] =
2310
19
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
19
            }
2312
37
        }
2313
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
2314
2315
529
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2316
529
        size_t old_size = pos;
2317
529
        size_t str_size = str_ref.size;
2318
2.20k
        while (pos < str_size &&
2319
2.20k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2320
1.98k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2321
1.67k
            pos++;
2322
1.67k
        }
2323
529
        return pos - old_size;
2324
529
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
2315
291
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2316
291
        size_t old_size = pos;
2317
291
        size_t str_size = str_ref.size;
2318
1.05k
        while (pos < str_size &&
2319
1.05k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2320
933
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2321
763
            pos++;
2322
763
        }
2323
291
        return pos - old_size;
2324
291
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
2315
238
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2316
238
        size_t old_size = pos;
2317
238
        size_t str_size = str_ref.size;
2318
1.15k
        while (pos < str_size &&
2319
1.15k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2320
1.05k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2321
914
            pos++;
2322
914
        }
2323
238
        return pos - old_size;
2324
238
    }
2325
2326
284
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2327
284
        int count = 0;
2328
284
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2329
64
            return 0;
2330
220
        } else {
2331
529
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2332
529
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2333
529
                if (res_pos == (str_ref.size - str_pos)) {
2334
220
                    break; // not find
2335
220
                }
2336
309
                count++;
2337
309
                str_pos = str_pos + res_pos + pattern_ref.size;
2338
309
            }
2339
220
        }
2340
220
        return count;
2341
284
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
2326
183
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2327
183
        int count = 0;
2328
183
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2329
62
            return 0;
2330
121
        } else {
2331
291
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2332
291
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2333
291
                if (res_pos == (str_ref.size - str_pos)) {
2334
121
                    break; // not find
2335
121
                }
2336
170
                count++;
2337
170
                str_pos = str_pos + res_pos + pattern_ref.size;
2338
170
            }
2339
121
        }
2340
121
        return count;
2341
183
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
2326
101
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2327
101
        int count = 0;
2328
101
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2329
2
            return 0;
2330
99
        } else {
2331
238
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2332
238
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2333
238
                if (res_pos == (str_ref.size - str_pos)) {
2334
99
                    break; // not find
2335
99
                }
2336
139
                count++;
2337
139
                str_pos = str_pos + res_pos + pattern_ref.size;
2338
139
            }
2339
99
        }
2340
99
        return count;
2341
101
    }
2342
};
2343
2344
struct SM3Sum {
2345
    static constexpr auto name = "sm3sum";
2346
    using ObjectData = SM3Digest;
2347
};
2348
2349
struct MD5Sum {
2350
    static constexpr auto name = "md5sum";
2351
    using ObjectData = Md5Digest;
2352
};
2353
2354
template <typename Impl>
2355
class FunctionStringDigestMulti : public IFunction {
2356
public:
2357
    static constexpr auto name = Impl::name;
2358
252
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6SM3SumEE6createEv
Line
Count
Source
2358
126
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6MD5SumEE6createEv
Line
Count
Source
2358
126
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
2359
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE8get_nameB5cxx11Ev
2360
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE23get_number_of_argumentsEv
2361
236
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE11is_variadicEv
Line
Count
Source
2361
118
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE11is_variadicEv
Line
Count
Source
2361
118
    bool is_variadic() const override { return true; }
2362
2363
234
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2364
234
        return std::make_shared<DataTypeString>();
2365
234
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2363
117
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2364
117
        return std::make_shared<DataTypeString>();
2365
117
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2363
117
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2364
117
        return std::make_shared<DataTypeString>();
2365
117
    }
2366
2367
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2368
243
                        uint32_t result, size_t input_rows_count) const override {
2369
243
        DCHECK_GE(arguments.size(), 1);
2370
2371
243
        auto res = ColumnString::create();
2372
243
        auto& res_data = res->get_chars();
2373
243
        auto& res_offset = res->get_offsets();
2374
243
        res_offset.resize(input_rows_count);
2375
2376
243
        std::vector<ColumnPtr> argument_columns(arguments.size());
2377
243
        std::vector<uint8_t> is_const(arguments.size(), 0);
2378
705
        for (size_t i = 0; i < arguments.size(); ++i) {
2379
462
            std::tie(argument_columns[i], is_const[i]) =
2380
462
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2381
462
        }
2382
2383
243
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2384
163
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2385
163
                                         res_data, res_offset);
2386
163
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2387
80
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2388
80
                                            res_data, res_offset);
2389
80
        } else {
2390
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2391
0
                                        argument_columns[0]->get_name(), get_name());
2392
0
        }
2393
2394
243
        block.replace_by_position(result, std::move(res));
2395
243
        return Status::OK();
2396
243
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2368
119
                        uint32_t result, size_t input_rows_count) const override {
2369
119
        DCHECK_GE(arguments.size(), 1);
2370
2371
119
        auto res = ColumnString::create();
2372
119
        auto& res_data = res->get_chars();
2373
119
        auto& res_offset = res->get_offsets();
2374
119
        res_offset.resize(input_rows_count);
2375
2376
119
        std::vector<ColumnPtr> argument_columns(arguments.size());
2377
119
        std::vector<uint8_t> is_const(arguments.size(), 0);
2378
348
        for (size_t i = 0; i < arguments.size(); ++i) {
2379
229
            std::tie(argument_columns[i], is_const[i]) =
2380
229
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2381
229
        }
2382
2383
119
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2384
79
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2385
79
                                         res_data, res_offset);
2386
79
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2387
40
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2388
40
                                            res_data, res_offset);
2389
40
        } else {
2390
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2391
0
                                        argument_columns[0]->get_name(), get_name());
2392
0
        }
2393
2394
119
        block.replace_by_position(result, std::move(res));
2395
119
        return Status::OK();
2396
119
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2368
124
                        uint32_t result, size_t input_rows_count) const override {
2369
124
        DCHECK_GE(arguments.size(), 1);
2370
2371
124
        auto res = ColumnString::create();
2372
124
        auto& res_data = res->get_chars();
2373
124
        auto& res_offset = res->get_offsets();
2374
124
        res_offset.resize(input_rows_count);
2375
2376
124
        std::vector<ColumnPtr> argument_columns(arguments.size());
2377
124
        std::vector<uint8_t> is_const(arguments.size(), 0);
2378
357
        for (size_t i = 0; i < arguments.size(); ++i) {
2379
233
            std::tie(argument_columns[i], is_const[i]) =
2380
233
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2381
233
        }
2382
2383
124
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2384
84
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2385
84
                                         res_data, res_offset);
2386
84
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2387
40
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2388
40
                                            res_data, res_offset);
2389
40
        } else {
2390
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2391
0
                                        argument_columns[0]->get_name(), get_name());
2392
0
        }
2393
2394
124
        block.replace_by_position(result, std::move(res));
2395
124
        return Status::OK();
2396
124
    }
2397
2398
private:
2399
    template <typename ColumnType>
2400
    void vector_execute(Block& block, size_t input_rows_count,
2401
                        const std::vector<ColumnPtr>& argument_columns,
2402
                        const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data,
2403
243
                        ColumnString::Offsets& res_offset) const {
2404
243
        using ObjectData = typename Impl::ObjectData;
2405
677
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
434
            ObjectData digest;
2407
1.12k
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
689
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
689
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
689
                if (data_ref.size < 1) {
2411
122
                    continue;
2412
122
                }
2413
567
                digest.update(data_ref.data, data_ref.size);
2414
567
            }
2415
434
            digest.digest();
2416
434
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
434
                                        i, res_data, res_offset);
2418
434
        }
2419
243
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
2403
79
                        ColumnString::Offsets& res_offset) const {
2404
79
        using ObjectData = typename Impl::ObjectData;
2405
239
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
160
            ObjectData digest;
2407
385
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
225
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
225
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
225
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
195
                digest.update(data_ref.data, data_ref.size);
2414
195
            }
2415
160
            digest.digest();
2416
160
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
160
                                        i, res_data, res_offset);
2418
160
        }
2419
79
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
2403
40
                        ColumnString::Offsets& res_offset) const {
2404
40
        using ObjectData = typename Impl::ObjectData;
2405
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
53
            ObjectData digest;
2407
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
116
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
86
                digest.update(data_ref.data, data_ref.size);
2414
86
            }
2415
53
            digest.digest();
2416
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
53
                                        i, res_data, res_offset);
2418
53
        }
2419
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
2403
84
                        ColumnString::Offsets& res_offset) const {
2404
84
        using ObjectData = typename Impl::ObjectData;
2405
252
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
168
            ObjectData digest;
2407
400
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
232
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
232
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
232
                if (data_ref.size < 1) {
2411
32
                    continue;
2412
32
                }
2413
200
                digest.update(data_ref.data, data_ref.size);
2414
200
            }
2415
168
            digest.digest();
2416
168
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
168
                                        i, res_data, res_offset);
2418
168
        }
2419
84
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
2403
40
                        ColumnString::Offsets& res_offset) const {
2404
40
        using ObjectData = typename Impl::ObjectData;
2405
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
53
            ObjectData digest;
2407
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
116
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
86
                digest.update(data_ref.data, data_ref.size);
2414
86
            }
2415
53
            digest.digest();
2416
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
53
                                        i, res_data, res_offset);
2418
53
        }
2419
40
    }
2420
};
2421
2422
class FunctionStringDigestSHA1 : public IFunction {
2423
public:
2424
    static constexpr auto name = "sha1";
2425
24
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA1>(); }
2426
0
    String get_name() const override { return name; }
2427
0
    size_t get_number_of_arguments() const override { return 1; }
2428
16
    bool is_variadic() const override { return true; }
2429
2430
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2431
15
        return std::make_shared<DataTypeString>();
2432
15
    }
2433
2434
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2435
14
                        uint32_t result, size_t input_rows_count) const override {
2436
14
        DCHECK_EQ(arguments.size(), 1);
2437
14
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
2438
2439
14
        auto res_col = ColumnString::create();
2440
14
        auto& res_data = res_col->get_chars();
2441
14
        auto& res_offset = res_col->get_offsets();
2442
14
        res_offset.resize(input_rows_count);
2443
14
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2444
9
            vector_execute(str_col, input_rows_count, res_data, res_offset);
2445
9
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2446
5
            vector_execute(vb_col, input_rows_count, res_data, res_offset);
2447
5
        } else {
2448
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2449
0
                                        data_col->get_name(), get_name());
2450
0
        }
2451
2452
14
        block.replace_by_position(result, std::move(res_col));
2453
14
        return Status::OK();
2454
14
    }
2455
2456
private:
2457
    template <typename ColumnType>
2458
    void vector_execute(const ColumnType* col, size_t input_rows_count,
2459
14
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2460
14
        SHA1Digest digest;
2461
37
        for (size_t i = 0; i < input_rows_count; ++i) {
2462
23
            StringRef data_ref = col->get_data_at(i);
2463
23
            digest.reset(data_ref.data, data_ref.size);
2464
23
            std::string_view ans = digest.digest();
2465
2466
23
            StringOP::push_value_string(ans, i, res_data, res_offset);
2467
23
        }
2468
14
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_9ColumnStrIjEEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Line
Count
Source
2459
9
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2460
9
        SHA1Digest digest;
2461
23
        for (size_t i = 0; i < input_rows_count; ++i) {
2462
14
            StringRef data_ref = col->get_data_at(i);
2463
14
            digest.reset(data_ref.data, data_ref.size);
2464
14
            std::string_view ans = digest.digest();
2465
2466
14
            StringOP::push_value_string(ans, i, res_data, res_offset);
2467
14
        }
2468
9
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_15ColumnVarbinaryEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS6_IjLm4096ES9_Lm16ELm15EEE
Line
Count
Source
2459
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2460
5
        SHA1Digest digest;
2461
14
        for (size_t i = 0; i < input_rows_count; ++i) {
2462
9
            StringRef data_ref = col->get_data_at(i);
2463
9
            digest.reset(data_ref.data, data_ref.size);
2464
9
            std::string_view ans = digest.digest();
2465
2466
9
            StringOP::push_value_string(ans, i, res_data, res_offset);
2467
9
        }
2468
5
    }
2469
};
2470
2471
class FunctionStringDigestSHA2 : public IFunction {
2472
public:
2473
    static constexpr auto name = "sha2";
2474
13
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA2>(); }
2475
0
    String get_name() const override { return name; }
2476
0
    size_t get_number_of_arguments() const override { return 2; }
2477
5
    bool is_variadic() const override { return true; }
2478
2479
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2480
4
        return std::make_shared<DataTypeString>();
2481
4
    }
2482
2483
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2484
6
                        uint32_t result, size_t input_rows_count) const override {
2485
6
        DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column));
2486
2487
6
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
2488
2489
6
        [[maybe_unused]] const auto& [right_column, right_const] =
2490
6
                unpack_if_const(block.get_by_position(arguments[1]).column);
2491
6
        auto digest_length = assert_cast<const ColumnInt32*>(right_column.get())->get_data()[0];
2492
2493
6
        auto res_col = ColumnString::create();
2494
6
        auto& res_data = res_col->get_chars();
2495
6
        auto& res_offset = res_col->get_offsets();
2496
6
        res_offset.resize(input_rows_count);
2497
2498
6
        if (digest_length == 224) {
2499
1
            execute_base<SHA224Digest>(data_col, input_rows_count, res_data, res_offset);
2500
5
        } else if (digest_length == 256) {
2501
2
            execute_base<SHA256Digest>(data_col, input_rows_count, res_data, res_offset);
2502
3
        } else if (digest_length == 384) {
2503
1
            execute_base<SHA384Digest>(data_col, input_rows_count, res_data, res_offset);
2504
2
        } else if (digest_length == 512) {
2505
2
            execute_base<SHA512Digest>(data_col, input_rows_count, res_data, res_offset);
2506
2
        } else {
2507
0
            return Status::InvalidArgument(
2508
0
                    "sha2's digest length only support 224/256/384/512 but meet {}", digest_length);
2509
0
        }
2510
2511
6
        block.replace_by_position(result, std::move(res_col));
2512
6
        return Status::OK();
2513
6
    }
2514
2515
private:
2516
    template <typename T>
2517
    void execute_base(ColumnPtr data_col, int input_rows_count, ColumnString::Chars& res_data,
2518
6
                      ColumnString::Offsets& res_offset) const {
2519
6
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2520
6
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2521
6
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2522
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2523
0
        } else {
2524
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2525
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2526
0
                            get_name());
2527
0
        }
2528
6
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA224DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2518
1
                      ColumnString::Offsets& res_offset) const {
2519
1
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2520
1
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2521
1
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2522
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2523
0
        } else {
2524
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2525
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2526
0
                            get_name());
2527
0
        }
2528
1
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA256DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2518
2
                      ColumnString::Offsets& res_offset) const {
2519
2
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2520
2
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2521
2
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2522
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2523
0
        } else {
2524
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2525
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2526
0
                            get_name());
2527
0
        }
2528
2
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA384DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2518
1
                      ColumnString::Offsets& res_offset) const {
2519
1
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2520
1
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2521
1
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2522
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2523
0
        } else {
2524
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2525
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2526
0
                            get_name());
2527
0
        }
2528
1
    }
_ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA512DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2518
2
                      ColumnString::Offsets& res_offset) const {
2519
2
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2520
2
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2521
2
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2522
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2523
0
        } else {
2524
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2525
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2526
0
                            get_name());
2527
0
        }
2528
2
    }
2529
2530
    template <typename DigestType, typename ColumnType>
2531
    void vector_execute(const ColumnType* col, size_t input_rows_count,
2532
6
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2533
6
        DigestType digest;
2534
14
        for (size_t i = 0; i < input_rows_count; ++i) {
2535
8
            StringRef data_ref = col->get_data_at(i);
2536
8
            digest.reset(data_ref.data, data_ref.size);
2537
8
            std::string_view ans = digest.digest();
2538
2539
8
            StringOP::push_value_string(ans, i, res_data, res_offset);
2540
8
        }
2541
6
    }
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2532
1
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2533
1
        DigestType digest;
2534
2
        for (size_t i = 0; i < input_rows_count; ++i) {
2535
1
            StringRef data_ref = col->get_data_at(i);
2536
1
            digest.reset(data_ref.data, data_ref.size);
2537
1
            std::string_view ans = digest.digest();
2538
2539
1
            StringOP::push_value_string(ans, i, res_data, res_offset);
2540
1
        }
2541
1
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2532
2
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2533
2
        DigestType digest;
2534
5
        for (size_t i = 0; i < input_rows_count; ++i) {
2535
3
            StringRef data_ref = col->get_data_at(i);
2536
3
            digest.reset(data_ref.data, data_ref.size);
2537
3
            std::string_view ans = digest.digest();
2538
2539
3
            StringOP::push_value_string(ans, i, res_data, res_offset);
2540
3
        }
2541
2
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2532
1
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2533
1
        DigestType digest;
2534
2
        for (size_t i = 0; i < input_rows_count; ++i) {
2535
1
            StringRef data_ref = col->get_data_at(i);
2536
1
            digest.reset(data_ref.data, data_ref.size);
2537
1
            std::string_view ans = digest.digest();
2538
2539
1
            StringOP::push_value_string(ans, i, res_data, res_offset);
2540
1
        }
2541
1
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
_ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Line
Count
Source
2532
2
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2533
2
        DigestType digest;
2534
5
        for (size_t i = 0; i < input_rows_count; ++i) {
2535
3
            StringRef data_ref = col->get_data_at(i);
2536
3
            digest.reset(data_ref.data, data_ref.size);
2537
3
            std::string_view ans = digest.digest();
2538
2539
3
            StringOP::push_value_string(ans, i, res_data, res_offset);
2540
3
        }
2541
2
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
2542
};
2543
2544
class FunctionExtractURLParameter : public IFunction {
2545
public:
2546
    static constexpr auto name = "extract_url_parameter";
2547
55
    static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); }
2548
1
    String get_name() const override { return name; }
2549
46
    size_t get_number_of_arguments() const override { return 2; }
2550
2551
46
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2552
46
        return std::make_shared<DataTypeString>();
2553
46
    }
2554
2555
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2556
52
                        uint32_t result, size_t input_rows_count) const override {
2557
52
        auto col_url =
2558
52
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
2559
52
        auto col_parameter =
2560
52
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
2561
52
        auto url_col = assert_cast<const ColumnString*>(col_url.get());
2562
52
        auto parameter_col = assert_cast<const ColumnString*>(col_parameter.get());
2563
2564
52
        ColumnString::MutablePtr col_res = ColumnString::create();
2565
2566
132
        for (int i = 0; i < input_rows_count; ++i) {
2567
80
            auto source = url_col->get_data_at(i);
2568
80
            auto param = parameter_col->get_data_at(i);
2569
80
            auto res = extract_url(source, param);
2570
2571
80
            col_res->insert_data(res.data, res.size);
2572
80
        }
2573
2574
52
        block.replace_by_position(result, std::move(col_res));
2575
52
        return Status::OK();
2576
52
    }
2577
2578
private:
2579
80
    StringRef extract_url(StringRef url, StringRef parameter) const {
2580
80
        if (url.size == 0 || parameter.size == 0) {
2581
8
            return StringRef("", 0);
2582
8
        }
2583
72
        return UrlParser::extract_url(url, parameter);
2584
80
    }
2585
};
2586
2587
class FunctionStringParseUrl : public IFunction {
2588
public:
2589
    static constexpr auto name = "parse_url";
2590
121
    static FunctionPtr create() { return std::make_shared<FunctionStringParseUrl>(); }
2591
0
    String get_name() const override { return name; }
2592
0
    size_t get_number_of_arguments() const override { return 0; }
2593
113
    bool is_variadic() const override { return true; }
2594
2595
112
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2596
112
        return make_nullable(std::make_shared<DataTypeString>());
2597
112
    }
2598
2599
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2600
136
                        uint32_t result, size_t input_rows_count) const override {
2601
136
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2602
136
        auto& null_map_data = null_map->get_data();
2603
136
        DCHECK_GE(3, arguments.size());
2604
136
        auto res = ColumnString::create();
2605
136
        auto& res_offsets = res->get_offsets();
2606
136
        auto& res_chars = res->get_chars();
2607
136
        res_offsets.resize(input_rows_count);
2608
2609
136
        size_t argument_size = arguments.size();
2610
136
        const bool has_key = argument_size == 3;
2611
2612
136
        std::vector<ColumnPtr> argument_columns(argument_size);
2613
136
        std::vector<UInt8> col_const(argument_size);
2614
453
        for (size_t i = 0; i < argument_size; ++i) {
2615
317
            std::tie(argument_columns[i], col_const[i]) =
2616
317
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2617
317
        }
2618
2619
136
        const auto* url_col = assert_cast<const ColumnString*>(argument_columns[0].get());
2620
136
        const auto* part_col = assert_cast<const ColumnString*>(argument_columns[1].get());
2621
136
        const bool part_const = col_const[1];
2622
136
        std::vector<UrlParser::UrlPart> url_parts;
2623
136
        const int part_nums = part_const ? 1 : input_rows_count;
2624
2625
136
        url_parts.resize(part_nums);
2626
295
        for (int i = 0; i < part_nums; i++) {
2627
159
            StringRef part = part_col->get_data_at(i);
2628
159
            UrlParser::UrlPart url_part = UrlParser::get_url_part(part);
2629
159
            if (url_part == UrlParser::INVALID) {
2630
0
                return Status::RuntimeError("Invalid URL part: {}\n{}",
2631
0
                                            std::string(part.data, part.size),
2632
0
                                            "(Valid URL parts are 'PROTOCOL', 'HOST', "
2633
0
                                            "'PATH', 'REF', 'AUTHORITY', "
2634
0
                                            "'FILE', 'USERINFO', 'PORT' and 'QUERY')");
2635
0
            }
2636
159
            url_parts[i] = url_part;
2637
159
        }
2638
2639
136
        if (has_key) {
2640
45
            const bool url_const = col_const[0];
2641
45
            const bool key_const = col_const[2];
2642
45
            const auto* key_col = assert_cast<const ColumnString*>(argument_columns[2].get());
2643
45
            RETURN_IF_ERROR(std::visit(
2644
45
                    [&](auto url_const, auto part_const, auto key_const) {
2645
45
                        return vector_parse_key<url_const, part_const, key_const>(
2646
45
                                url_col, url_parts, key_col, input_rows_count, null_map_data,
2647
45
                                res_chars, res_offsets);
2648
45
                    },
2649
45
                    make_bool_variant(url_const), make_bool_variant(part_const),
2650
45
                    make_bool_variant(key_const)));
2651
91
        } else {
2652
91
            const bool url_const = col_const[0];
2653
91
            RETURN_IF_ERROR(std::visit(
2654
91
                    [&](auto url_const, auto part_const) {
2655
91
                        return vector_parse<url_const, part_const>(url_col, url_parts,
2656
91
                                                                   input_rows_count, null_map_data,
2657
91
                                                                   res_chars, res_offsets);
2658
91
                    },
2659
91
                    make_bool_variant(url_const), make_bool_variant(part_const)));
2660
91
        }
2661
136
        block.get_by_position(result).column =
2662
136
                ColumnNullable::create(std::move(res), std::move(null_map));
2663
136
        return Status::OK();
2664
136
    }
2665
    template <bool url_const, bool part_const>
2666
    static Status vector_parse(const ColumnString* url_col,
2667
                               std::vector<UrlParser::UrlPart>& url_parts, const int size,
2668
                               ColumnUInt8::Container& null_map_data,
2669
91
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
238
        for (size_t i = 0; i < size; ++i) {
2671
147
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
147
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
147
            StringRef parse_res;
2674
147
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
71
                if (parse_res.empty()) [[unlikely]] {
2676
5
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
5
                    continue;
2678
5
                }
2679
66
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
66
                                            res_chars, res_offsets);
2681
76
            } else {
2682
76
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
76
            }
2684
147
        }
2685
91
        return Status::OK();
2686
91
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2669
31
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
82
        for (size_t i = 0; i < size; ++i) {
2671
51
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
51
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
51
            StringRef parse_res;
2674
51
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
35
                if (parse_res.empty()) [[unlikely]] {
2676
2
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
2
                    continue;
2678
2
                }
2679
33
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
33
                                            res_chars, res_offsets);
2681
33
            } else {
2682
16
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
16
            }
2684
51
        }
2685
31
        return Status::OK();
2686
31
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2669
39
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
114
        for (size_t i = 0; i < size; ++i) {
2671
75
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
75
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
75
            StringRef parse_res;
2674
75
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
20
                if (parse_res.empty()) [[unlikely]] {
2676
2
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
2
                    continue;
2678
2
                }
2679
18
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
18
                                            res_chars, res_offsets);
2681
55
            } else {
2682
55
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
55
            }
2684
75
        }
2685
39
        return Status::OK();
2686
39
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2669
21
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
42
        for (size_t i = 0; i < size; ++i) {
2671
21
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
21
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
21
            StringRef parse_res;
2674
21
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
16
                if (parse_res.empty()) [[unlikely]] {
2676
1
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
1
                    continue;
2678
1
                }
2679
15
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
15
                                            res_chars, res_offsets);
2681
15
            } else {
2682
5
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
5
            }
2684
21
        }
2685
21
        return Status::OK();
2686
21
    }
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
2687
    template <bool url_const, bool part_const, bool key_const>
2688
    static Status vector_parse_key(const ColumnString* url_col,
2689
                                   std::vector<UrlParser::UrlPart>& url_parts,
2690
                                   const ColumnString* key_col, const int size,
2691
                                   ColumnUInt8::Container& null_map_data,
2692
                                   ColumnString::Chars& res_chars,
2693
45
                                   ColumnString::Offsets& res_offsets) {
2694
127
        for (size_t i = 0; i < size; ++i) {
2695
82
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
82
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
82
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
82
            StringRef parse_res;
2699
82
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
16
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
16
                                            res_chars, res_offsets);
2702
66
            } else {
2703
66
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
66
                continue;
2705
66
            }
2706
82
        }
2707
45
        return Status::OK();
2708
45
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
5
                                   ColumnString::Offsets& res_offsets) {
2694
13
        for (size_t i = 0; i < size; ++i) {
2695
8
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
8
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
8
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
8
            StringRef parse_res;
2699
8
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
4
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
4
                                            res_chars, res_offsets);
2702
4
            } else {
2703
4
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
4
                continue;
2705
4
            }
2706
8
        }
2707
5
        return Status::OK();
2708
5
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
20
                                   ColumnString::Offsets& res_offsets) {
2694
74
        for (size_t i = 0; i < size; ++i) {
2695
54
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
54
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
54
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
54
            StringRef parse_res;
2699
54
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
52
            } else {
2703
52
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
52
                continue;
2705
52
            }
2706
54
        }
2707
20
        return Status::OK();
2708
20
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
2709
};
2710
2711
class FunctionUrlDecode : public IFunction {
2712
public:
2713
    static constexpr auto name = "url_decode";
2714
12
    static FunctionPtr create() { return std::make_shared<FunctionUrlDecode>(); }
2715
1
    String get_name() const override { return name; }
2716
3
    size_t get_number_of_arguments() const override { return 1; }
2717
3
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2718
3
        return std::make_shared<DataTypeString>();
2719
3
    }
2720
2721
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2722
15
                        uint32_t result, size_t input_rows_count) const override {
2723
15
        auto res = ColumnString::create();
2724
15
        res->get_offsets().reserve(input_rows_count);
2725
2726
15
        const auto* url_col =
2727
15
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
2728
2729
15
        std::string decoded_url;
2730
33
        for (size_t i = 0; i < input_rows_count; ++i) {
2731
18
            auto url = url_col->get_data_at(i);
2732
18
            if (!url_decode(url.to_string(), &decoded_url)) {
2733
0
                return Status::InternalError("Decode url failed");
2734
0
            }
2735
18
            res->insert_data(decoded_url.data(), decoded_url.size());
2736
18
            decoded_url.clear();
2737
18
        }
2738
2739
15
        block.get_by_position(result).column = std::move(res);
2740
15
        return Status::OK();
2741
15
    }
2742
};
2743
2744
class FunctionUrlEncode : public IFunction {
2745
public:
2746
    static constexpr auto name = "url_encode";
2747
16
    static FunctionPtr create() { return std::make_shared<FunctionUrlEncode>(); }
2748
1
    String get_name() const override { return name; }
2749
7
    size_t get_number_of_arguments() const override { return 1; }
2750
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2751
7
        return std::make_shared<DataTypeString>();
2752
7
    }
2753
2754
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2755
19
                        uint32_t result, size_t input_rows_count) const override {
2756
19
        auto res = ColumnString::create();
2757
19
        res->get_offsets().reserve(input_rows_count);
2758
2759
19
        const auto* url_col =
2760
19
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
2761
2762
19
        std::string encoded_url;
2763
43
        for (size_t i = 0; i < input_rows_count; ++i) {
2764
24
            auto url = url_col->get_data_at(i);
2765
24
            url_encode(url.to_string_view(), &encoded_url);
2766
24
            res->insert_data(encoded_url.data(), encoded_url.size());
2767
24
            encoded_url.clear();
2768
24
        }
2769
2770
19
        block.get_by_position(result).column = std::move(res);
2771
19
        return Status::OK();
2772
19
    }
2773
};
2774
2775
class FunctionRandomBytes : public IFunction {
2776
public:
2777
    static constexpr auto name = "random_bytes";
2778
14
    static FunctionPtr create() { return std::make_shared<FunctionRandomBytes>(); }
2779
1
    String get_name() const override { return name; }
2780
5
    size_t get_number_of_arguments() const override { return 1; }
2781
6
    bool is_variadic() const override { return false; }
2782
2783
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2784
5
        return std::make_shared<DataTypeString>();
2785
5
    }
2786
2787
15
    bool use_default_implementation_for_constants() const final { return false; }
2788
2789
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2790
4
                        uint32_t result, size_t input_rows_count) const override {
2791
4
        auto res = ColumnString::create();
2792
4
        auto& res_offsets = res->get_offsets();
2793
4
        auto& res_chars = res->get_chars();
2794
4
        res_offsets.resize(input_rows_count);
2795
2796
4
        auto [arg_col, arg_const] = unpack_if_const(block.get_by_position(arguments[0]).column);
2797
4
        const auto* length_col = assert_cast<const ColumnInt32*>(arg_col.get());
2798
2799
4
        if (arg_const) {
2800
3
            res_chars.reserve(input_rows_count * (length_col->get_element(0) + 2));
2801
3
        }
2802
2803
4
        std::vector<uint8_t, Allocator_<uint8_t>> random_bytes;
2804
4
        std::random_device rd;
2805
4
        std::mt19937 gen(rd());
2806
2807
4
        std::uniform_int_distribution<unsigned short> distribution(0, 255);
2808
19
        for (size_t i = 0; i < input_rows_count; ++i) {
2809
16
            size_t index = index_check_const(i, arg_const);
2810
16
            if (length_col->get_element(index) < 0) [[unlikely]] {
2811
1
                return Status::InvalidArgument("argument {} of function {} at row {} was invalid.",
2812
1
                                               length_col->get_element(index), name, index);
2813
1
            }
2814
15
            random_bytes.resize(length_col->get_element(index));
2815
2816
117
            for (auto& byte : random_bytes) {
2817
117
                byte = distribution(gen) & 0xFF;
2818
117
            }
2819
2820
15
            std::basic_ostringstream<char, std::char_traits<char>, Allocator_<char>> oss;
2821
117
            for (const auto& byte : random_bytes) {
2822
117
                oss << std::setw(2) << std::setfill('0') << std::hex << static_cast<int>(byte);
2823
117
            }
2824
2825
15
            StringOP::push_value_string("0x" + oss.str(), i, res_chars, res_offsets);
2826
15
            random_bytes.clear();
2827
15
        }
2828
2829
3
        block.get_by_position(result).column = std::move(res);
2830
2831
3
        return Status::OK();
2832
4
    }
2833
};
2834
2835
template <typename Impl>
2836
class FunctionMoneyFormat : public IFunction {
2837
public:
2838
    static constexpr auto name = "money_format";
2839
204
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE6createEv
Line
Count
Source
2839
27
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE6createEv
Line
Count
Source
2839
36
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE6createEv
Line
Count
Source
2839
14
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv
Line
Count
Source
2839
11
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv
Line
Count
Source
2839
61
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv
Line
Count
Source
2839
27
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv
Line
Count
Source
2839
19
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv
Line
Count
Source
2839
9
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
2840
8
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
2841
2842
131
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
131
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
131
        return std::make_shared<DataTypeString>();
2849
131
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2842
18
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
18
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
18
        return std::make_shared<DataTypeString>();
2849
18
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2842
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
27
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
27
        return std::make_shared<DataTypeString>();
2849
27
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2842
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
5
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
5
        return std::make_shared<DataTypeString>();
2849
5
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2842
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
1
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
1
        return std::make_shared<DataTypeString>();
2849
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2842
52
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
52
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
52
        return std::make_shared<DataTypeString>();
2849
52
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2842
18
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
18
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
18
        return std::make_shared<DataTypeString>();
2849
18
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2842
10
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
10
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
10
        return std::make_shared<DataTypeString>();
2849
10
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
2850
64
    DataTypes get_variadic_argument_types_impl() const override {
2851
64
        return Impl::get_variadic_argument_types();
2852
64
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
8
    DataTypes get_variadic_argument_types_impl() const override {
2851
8
        return Impl::get_variadic_argument_types();
2852
8
    }
2853
131
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE23get_number_of_argumentsEv
Line
Count
Source
2853
18
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE23get_number_of_argumentsEv
Line
Count
Source
2853
27
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE23get_number_of_argumentsEv
Line
Count
Source
2853
5
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv
Line
Count
Source
2853
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv
Line
Count
Source
2853
52
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv
Line
Count
Source
2853
18
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv
Line
Count
Source
2853
10
    size_t get_number_of_arguments() const override { return 1; }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv
2854
2855
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2856
155
                        uint32_t result, size_t input_rows_count) const override {
2857
155
        auto res_column = ColumnString::create();
2858
155
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
155
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
155
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
155
        block.replace_by_position(result, std::move(res_column));
2865
155
        return Status::OK();
2866
155
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
24
                        uint32_t result, size_t input_rows_count) const override {
2857
24
        auto res_column = ColumnString::create();
2858
24
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
24
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
24
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
24
        block.replace_by_position(result, std::move(res_column));
2865
24
        return Status::OK();
2866
24
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
33
                        uint32_t result, size_t input_rows_count) const override {
2857
33
        auto res_column = ColumnString::create();
2858
33
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
33
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
33
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
33
        block.replace_by_position(result, std::move(res_column));
2865
33
        return Status::OK();
2866
33
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
11
                        uint32_t result, size_t input_rows_count) const override {
2857
11
        auto res_column = ColumnString::create();
2858
11
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
11
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
11
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
11
        block.replace_by_position(result, std::move(res_column));
2865
11
        return Status::OK();
2866
11
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
2
                        uint32_t result, size_t input_rows_count) const override {
2857
2
        auto res_column = ColumnString::create();
2858
2
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
2
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
2
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
2
        block.replace_by_position(result, std::move(res_column));
2865
2
        return Status::OK();
2866
2
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
58
                        uint32_t result, size_t input_rows_count) const override {
2857
58
        auto res_column = ColumnString::create();
2858
58
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
58
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
58
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
58
        block.replace_by_position(result, std::move(res_column));
2865
58
        return Status::OK();
2866
58
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
17
                        uint32_t result, size_t input_rows_count) const override {
2857
17
        auto res_column = ColumnString::create();
2858
17
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
17
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
17
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
17
        block.replace_by_position(result, std::move(res_column));
2865
17
        return Status::OK();
2866
17
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
10
                        uint32_t result, size_t input_rows_count) const override {
2857
10
        auto res_column = ColumnString::create();
2858
10
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
10
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
10
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
10
        block.replace_by_position(result, std::move(res_column));
2865
10
        return Status::OK();
2866
10
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
2867
};
2868
2869
// ----------------------------------------------------------------------
2870
// SimpleItoaWithCommas()
2871
//    Description: converts an integer to a string.
2872
//    Puts commas every 3 spaces.
2873
//    Faster than printf("%d")?
2874
//
2875
//    Return value: string
2876
// ----------------------------------------------------------------------
2877
template <typename T>
2878
279
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2879
279
    char* p = buffer + buffer_size;
2880
    // Need to use unsigned T instead of T to correctly handle
2881
279
    std::make_unsigned_t<T> n = i;
2882
279
    if (i < 0) {
2883
54
        n = 0 - n;
2884
54
    }
2885
279
    *--p = '0' + n % 10; // this case deals with the number "0"
2886
279
    n /= 10;
2887
599
    while (n) {
2888
424
        *--p = '0' + n % 10;
2889
424
        n /= 10;
2890
424
        if (n == 0) {
2891
54
            break;
2892
54
        }
2893
2894
370
        *--p = '0' + n % 10;
2895
370
        n /= 10;
2896
370
        if (n == 0) {
2897
50
            break;
2898
50
        }
2899
2900
320
        *--p = ',';
2901
320
        *--p = '0' + n % 10;
2902
320
        n /= 10;
2903
        // For this unrolling, we check if n == 0 in the main while loop
2904
320
    }
2905
279
    if (i < 0) {
2906
54
        *--p = '-';
2907
54
    }
2908
279
    return p;
2909
279
}
_ZN5doris20SimpleItoaWithCommasIlEEPcT_S1_i
Line
Count
Source
2878
207
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2879
207
    char* p = buffer + buffer_size;
2880
    // Need to use unsigned T instead of T to correctly handle
2881
207
    std::make_unsigned_t<T> n = i;
2882
207
    if (i < 0) {
2883
39
        n = 0 - n;
2884
39
    }
2885
207
    *--p = '0' + n % 10; // this case deals with the number "0"
2886
207
    n /= 10;
2887
379
    while (n) {
2888
241
        *--p = '0' + n % 10;
2889
241
        n /= 10;
2890
241
        if (n == 0) {
2891
36
            break;
2892
36
        }
2893
2894
205
        *--p = '0' + n % 10;
2895
205
        n /= 10;
2896
205
        if (n == 0) {
2897
33
            break;
2898
33
        }
2899
2900
172
        *--p = ',';
2901
172
        *--p = '0' + n % 10;
2902
172
        n /= 10;
2903
        // For this unrolling, we check if n == 0 in the main while loop
2904
172
    }
2905
207
    if (i < 0) {
2906
39
        *--p = '-';
2907
39
    }
2908
207
    return p;
2909
207
}
_ZN5doris20SimpleItoaWithCommasInEEPcT_S1_i
Line
Count
Source
2878
72
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2879
72
    char* p = buffer + buffer_size;
2880
    // Need to use unsigned T instead of T to correctly handle
2881
72
    std::make_unsigned_t<T> n = i;
2882
72
    if (i < 0) {
2883
15
        n = 0 - n;
2884
15
    }
2885
72
    *--p = '0' + n % 10; // this case deals with the number "0"
2886
72
    n /= 10;
2887
220
    while (n) {
2888
183
        *--p = '0' + n % 10;
2889
183
        n /= 10;
2890
183
        if (n == 0) {
2891
18
            break;
2892
18
        }
2893
2894
165
        *--p = '0' + n % 10;
2895
165
        n /= 10;
2896
165
        if (n == 0) {
2897
17
            break;
2898
17
        }
2899
2900
148
        *--p = ',';
2901
148
        *--p = '0' + n % 10;
2902
148
        n /= 10;
2903
        // For this unrolling, we check if n == 0 in the main while loop
2904
148
    }
2905
72
    if (i < 0) {
2906
15
        *--p = '-';
2907
15
    }
2908
72
    return p;
2909
72
}
2910
2911
namespace MoneyFormat {
2912
2913
0
constexpr size_t MAX_FORMAT_LEN_DEC32() {
2914
0
    // Decimal(9, 0)
2915
0
    // Double the size to avoid some unexpected bug.
2916
0
    return 2 * (1 + 9 + (9 / 3) + 3);
2917
0
}
2918
2919
0
constexpr size_t MAX_FORMAT_LEN_DEC64() {
2920
0
    // Decimal(18, 0)
2921
0
    // Double the size to avoid some unexpected bug.
2922
0
    return 2 * (1 + 18 + (18 / 3) + 3);
2923
0
}
2924
2925
0
constexpr size_t MAX_FORMAT_LEN_DEC128V2() {
2926
0
    // DecimalV2 has at most 27 digits
2927
0
    // Double the size to avoid some unexpected bug.
2928
0
    return 2 * (1 + 27 + (27 / 3) + 3);
2929
0
}
2930
2931
0
constexpr size_t MAX_FORMAT_LEN_DEC128V3() {
2932
0
    // Decimal(38, 0)
2933
0
    // Double the size to avoid some unexpected bug.
2934
0
    return 2 * (1 + 39 + (39 / 3) + 3);
2935
0
}
2936
2937
0
constexpr size_t MAX_FORMAT_LEN_INT64() {
2938
0
    // INT_MIN = -9223372036854775807
2939
0
    // Double the size to avoid some unexpected bug.
2940
0
    return 2 * (1 + 20 + (20 / 3) + 3);
2941
0
}
2942
2943
0
constexpr size_t MAX_FORMAT_LEN_INT128() {
2944
0
    // INT128_MIN = -170141183460469231731687303715884105728
2945
0
    return 2 * (1 + 39 + (39 / 3) + 3);
2946
0
}
2947
2948
template <typename T, size_t N>
2949
200
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
200
    static_assert(std::is_integral<T>::value);
2951
200
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
200
    if (scale > 2) {
2956
72
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
72
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
72
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
72
        frac_value /= 10;
2963
128
    } else if (scale < 2) {
2964
107
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
107
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
107
    }
2968
2969
200
    if (frac_value == 100) {
2970
11
        if (is_negative) {
2971
6
            int_value -= 1;
2972
6
        } else {
2973
5
            int_value += 1;
2974
5
        }
2975
11
        frac_value = 0;
2976
11
    }
2977
2978
200
    bool append_sign_manually = false;
2979
200
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
8
        append_sign_manually = true;
2984
8
    }
2985
2986
200
    char local[N];
2987
200
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
200
    const Int32 integer_str_len = N - (p - local);
2989
200
    const Int32 frac_str_len = 2;
2990
200
    const Int32 whole_decimal_str_len =
2991
200
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
200
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
200
    char* result_data = const_cast<char*>(result.data);
2996
2997
200
    if (append_sign_manually) {
2998
8
        memset(result_data, '-', 1);
2999
8
    }
3000
3001
200
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
200
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
200
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
200
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
200
    return result;
3006
200
};
_ZN5doris11MoneyFormat15do_money_formatIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
52
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
52
    static_assert(std::is_integral<T>::value);
2951
52
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
52
    if (scale > 2) {
2956
0
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
0
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
0
        frac_value /= 10;
2963
52
    } else if (scale < 2) {
2964
52
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
52
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
52
    }
2968
2969
52
    if (frac_value == 100) {
2970
0
        if (is_negative) {
2971
0
            int_value -= 1;
2972
0
        } else {
2973
0
            int_value += 1;
2974
0
        }
2975
0
        frac_value = 0;
2976
0
    }
2977
2978
52
    bool append_sign_manually = false;
2979
52
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
0
        append_sign_manually = true;
2984
0
    }
2985
2986
52
    char local[N];
2987
52
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
52
    const Int32 integer_str_len = N - (p - local);
2989
52
    const Int32 frac_str_len = 2;
2990
52
    const Int32 whole_decimal_str_len =
2991
52
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
52
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
52
    char* result_data = const_cast<char*>(result.data);
2996
2997
52
    if (append_sign_manually) {
2998
0
        memset(result_data, '-', 1);
2999
0
    }
3000
3001
52
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
52
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
52
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
52
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
52
    return result;
3006
52
};
_ZN5doris11MoneyFormat15do_money_formatInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
40
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
40
    static_assert(std::is_integral<T>::value);
2951
40
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
40
    if (scale > 2) {
2956
4
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
4
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
4
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
4
        frac_value /= 10;
2963
36
    } else if (scale < 2) {
2964
34
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
34
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
34
    }
2968
2969
40
    if (frac_value == 100) {
2970
2
        if (is_negative) {
2971
1
            int_value -= 1;
2972
1
        } else {
2973
1
            int_value += 1;
2974
1
        }
2975
2
        frac_value = 0;
2976
2
    }
2977
2978
40
    bool append_sign_manually = false;
2979
40
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
0
        append_sign_manually = true;
2984
0
    }
2985
2986
40
    char local[N];
2987
40
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
40
    const Int32 integer_str_len = N - (p - local);
2989
40
    const Int32 frac_str_len = 2;
2990
40
    const Int32 whole_decimal_str_len =
2991
40
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
40
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
40
    char* result_data = const_cast<char*>(result.data);
2996
2997
40
    if (append_sign_manually) {
2998
0
        memset(result_data, '-', 1);
2999
0
    }
3000
3001
40
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
40
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
40
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
40
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
40
    return result;
3006
40
};
_ZN5doris11MoneyFormat15do_money_formatInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
14
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
14
    static_assert(std::is_integral<T>::value);
2951
14
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
14
    if (scale > 2) {
2956
14
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
14
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
14
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
14
        frac_value /= 10;
2963
14
    } else if (scale < 2) {
2964
0
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
0
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
0
    }
2968
2969
14
    if (frac_value == 100) {
2970
3
        if (is_negative) {
2971
2
            int_value -= 1;
2972
2
        } else {
2973
1
            int_value += 1;
2974
1
        }
2975
3
        frac_value = 0;
2976
3
    }
2977
2978
14
    bool append_sign_manually = false;
2979
14
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
2
        append_sign_manually = true;
2984
2
    }
2985
2986
14
    char local[N];
2987
14
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
14
    const Int32 integer_str_len = N - (p - local);
2989
14
    const Int32 frac_str_len = 2;
2990
14
    const Int32 whole_decimal_str_len =
2991
14
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
14
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
14
    char* result_data = const_cast<char*>(result.data);
2996
2997
14
    if (append_sign_manually) {
2998
2
        memset(result_data, '-', 1);
2999
2
    }
3000
3001
14
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
14
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
14
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
14
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
14
    return result;
3006
14
};
_ZN5doris11MoneyFormat15do_money_formatIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
75
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
75
    static_assert(std::is_integral<T>::value);
2951
75
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
75
    if (scale > 2) {
2956
43
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
43
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
43
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
43
        frac_value /= 10;
2963
43
    } else if (scale < 2) {
2964
15
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
15
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
15
    }
2968
2969
75
    if (frac_value == 100) {
2970
2
        if (is_negative) {
2971
1
            int_value -= 1;
2972
1
        } else {
2973
1
            int_value += 1;
2974
1
        }
2975
2
        frac_value = 0;
2976
2
    }
2977
2978
75
    bool append_sign_manually = false;
2979
75
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
6
        append_sign_manually = true;
2984
6
    }
2985
2986
75
    char local[N];
2987
75
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
75
    const Int32 integer_str_len = N - (p - local);
2989
75
    const Int32 frac_str_len = 2;
2990
75
    const Int32 whole_decimal_str_len =
2991
75
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
75
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
75
    char* result_data = const_cast<char*>(result.data);
2996
2997
75
    if (append_sign_manually) {
2998
6
        memset(result_data, '-', 1);
2999
6
    }
3000
3001
75
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
75
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
75
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
75
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
75
    return result;
3006
75
};
_ZN5doris11MoneyFormat15do_money_formatIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
19
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
19
    static_assert(std::is_integral<T>::value);
2951
19
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
19
    if (scale > 2) {
2956
11
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
11
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
11
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
11
        frac_value /= 10;
2963
11
    } else if (scale < 2) {
2964
6
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
6
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
6
    }
2968
2969
19
    if (frac_value == 100) {
2970
4
        if (is_negative) {
2971
2
            int_value -= 1;
2972
2
        } else {
2973
2
            int_value += 1;
2974
2
        }
2975
4
        frac_value = 0;
2976
4
    }
2977
2978
19
    bool append_sign_manually = false;
2979
19
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
0
        append_sign_manually = true;
2984
0
    }
2985
2986
19
    char local[N];
2987
19
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
19
    const Int32 integer_str_len = N - (p - local);
2989
19
    const Int32 frac_str_len = 2;
2990
19
    const Int32 whole_decimal_str_len =
2991
19
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
19
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
19
    char* result_data = const_cast<char*>(result.data);
2996
2997
19
    if (append_sign_manually) {
2998
0
        memset(result_data, '-', 1);
2999
0
    }
3000
3001
19
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
19
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
19
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
19
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
19
    return result;
3006
19
};
3007
3008
// Note string value must be valid decimal string which contains two digits after the decimal point
3009
44
static StringRef do_money_format(FunctionContext* context, const std::string& value) {
3010
44
    bool is_positive = (value[0] != '-');
3011
44
    int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3;
3012
44
    StringRef result = context->create_temp_string_val(result_len);
3013
    // Modify a string passed via stringref
3014
44
    char* result_data = const_cast<char*>(result.data);
3015
44
    if (!is_positive) {
3016
9
        *result_data = '-';
3017
9
    }
3018
465
    for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) {
3019
462
        *(result_data + j) = *(value.data() + i);
3020
462
        if (i - 1 < 0) {
3021
36
            break;
3022
36
        }
3023
426
        *(result_data + j - 1) = *(value.data() + i - 1);
3024
426
        if (i - 2 < 0) {
3025
5
            break;
3026
5
        }
3027
421
        *(result_data + j - 2) = *(value.data() + i - 2);
3028
421
        if (j - 3 > 1 || (j - 3 == 1 && is_positive)) {
3029
414
            *(result_data + j - 3) = ',';
3030
414
            j -= 4;
3031
414
        } else {
3032
7
            j -= 3;
3033
7
        }
3034
421
    }
3035
44
    memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3);
3036
44
    return result;
3037
44
};
function_string.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3009
44
static StringRef do_money_format(FunctionContext* context, const std::string& value) {
3010
44
    bool is_positive = (value[0] != '-');
3011
44
    int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3;
3012
44
    StringRef result = context->create_temp_string_val(result_len);
3013
    // Modify a string passed via stringref
3014
44
    char* result_data = const_cast<char*>(result.data);
3015
44
    if (!is_positive) {
3016
9
        *result_data = '-';
3017
9
    }
3018
465
    for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) {
3019
462
        *(result_data + j) = *(value.data() + i);
3020
462
        if (i - 1 < 0) {
3021
36
            break;
3022
36
        }
3023
426
        *(result_data + j - 1) = *(value.data() + i - 1);
3024
426
        if (i - 2 < 0) {
3025
5
            break;
3026
5
        }
3027
421
        *(result_data + j - 2) = *(value.data() + i - 2);
3028
421
        if (j - 3 > 1 || (j - 3 == 1 && is_positive)) {
3029
414
            *(result_data + j - 3) = ',';
3030
414
            j -= 4;
3031
414
        } else {
3032
7
            j -= 3;
3033
7
        }
3034
421
    }
3035
44
    memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3);
3036
44
    return result;
3037
44
};
Unexecuted instantiation: function_split_by_regexp.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: pipeline_fragment_context.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: viceberg_table_writer.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: partition_transformers.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: spill_iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
3038
3039
} // namespace MoneyFormat
3040
3041
namespace FormatRound {
3042
3043
0
constexpr size_t MAX_FORMAT_LEN_DEC32() {
3044
0
    // Decimal(9, 0)
3045
0
    // Double the size to avoid some unexpected bug.
3046
0
    return 2 * (1 + 9 + (9 / 3) + 3);
3047
0
}
3048
3049
0
constexpr size_t MAX_FORMAT_LEN_DEC64() {
3050
0
    // Decimal(18, 0)
3051
0
    // Double the size to avoid some unexpected bug.
3052
0
    return 2 * (1 + 18 + (18 / 3) + 3);
3053
0
}
3054
3055
0
constexpr size_t MAX_FORMAT_LEN_DEC128V2() {
3056
0
    // DecimalV2 has at most 27 digits
3057
0
    // Double the size to avoid some unexpected bug.
3058
0
    return 2 * (1 + 27 + (27 / 3) + 3);
3059
0
}
3060
3061
0
constexpr size_t MAX_FORMAT_LEN_DEC128V3() {
3062
0
    // Decimal(38, 0)
3063
0
    // Double the size to avoid some unexpected bug.
3064
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3065
0
}
3066
3067
0
constexpr size_t MAX_FORMAT_LEN_INT64() {
3068
0
    // INT_MIN = -9223372036854775807
3069
0
    // Double the size to avoid some unexpected bug.
3070
0
    return 2 * (1 + 20 + (20 / 3) + 3);
3071
0
}
3072
3073
0
constexpr size_t MAX_FORMAT_LEN_INT128() {
3074
0
    // INT128_MIN = -170141183460469231731687303715884105728
3075
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3076
0
}
3077
3078
template <typename T, size_t N>
3079
StringRef do_format_round(FunctionContext* context, UInt32 scale, T int_value, T frac_value,
3080
79
                          Int32 decimal_places) {
3081
79
    static_assert(std::is_integral<T>::value);
3082
79
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
79
    if (scale > decimal_places && decimal_places > 0) {
3086
20
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
20
        auto multiplier =
3089
20
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
20
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
20
        frac_value /= 10;
3094
59
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
39
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
39
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
79
    T decimal_power = common::exp10_i32(decimal_places);
3101
79
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
79
    bool append_sign_manually = false;
3111
79
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
79
    char local[N];
3116
79
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
79
    const Int32 integer_str_len = N - (p - local);
3118
79
    const Int32 frac_str_len = decimal_places;
3119
79
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
79
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
79
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
79
    char* result_data = const_cast<char*>(result.data);
3125
3126
79
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
79
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
79
    if (decimal_places > 0) {
3132
64
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
64
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
79
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
2.34k
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
2.27k
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
2.27k
        remaining_frac /= 10;
3140
2.27k
    }
3141
79
    return result;
3142
79
}
_ZN5doris11FormatRound15do_format_roundIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
16
                          Int32 decimal_places) {
3081
16
    static_assert(std::is_integral<T>::value);
3082
16
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
16
    if (scale > decimal_places && decimal_places > 0) {
3086
0
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
0
        auto multiplier =
3089
0
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
0
        frac_value /= 10;
3094
16
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
12
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
12
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
16
    T decimal_power = common::exp10_i32(decimal_places);
3101
16
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
16
    bool append_sign_manually = false;
3111
16
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
16
    char local[N];
3116
16
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
16
    const Int32 integer_str_len = N - (p - local);
3118
16
    const Int32 frac_str_len = decimal_places;
3119
16
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
16
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
16
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
16
    char* result_data = const_cast<char*>(result.data);
3125
3126
16
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
16
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
16
    if (decimal_places > 0) {
3132
12
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
12
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
16
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
92
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
76
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
76
        remaining_frac /= 10;
3140
76
    }
3141
16
    return result;
3142
16
}
_ZN5doris11FormatRound15do_format_roundInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
15
                          Int32 decimal_places) {
3081
15
    static_assert(std::is_integral<T>::value);
3082
15
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
15
    if (scale > decimal_places && decimal_places > 0) {
3086
2
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
2
        auto multiplier =
3089
2
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
2
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
2
        frac_value /= 10;
3094
13
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
10
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
10
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
15
    T decimal_power = common::exp10_i32(decimal_places);
3101
15
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
15
    bool append_sign_manually = false;
3111
15
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
15
    char local[N];
3116
15
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
15
    const Int32 integer_str_len = N - (p - local);
3118
15
    const Int32 frac_str_len = decimal_places;
3119
15
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
15
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
15
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
15
    char* result_data = const_cast<char*>(result.data);
3125
3126
15
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
15
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
15
    if (decimal_places > 0) {
3132
13
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
13
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
15
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
94
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
79
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
79
        remaining_frac /= 10;
3140
79
    }
3141
15
    return result;
3142
15
}
_ZN5doris11FormatRound15do_format_roundInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
3
                          Int32 decimal_places) {
3081
3
    static_assert(std::is_integral<T>::value);
3082
3
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
3
    if (scale > decimal_places && decimal_places > 0) {
3086
3
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
3
        auto multiplier =
3089
3
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
3
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
3
        frac_value /= 10;
3094
3
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
0
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
0
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
3
    T decimal_power = common::exp10_i32(decimal_places);
3101
3
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
3
    bool append_sign_manually = false;
3111
3
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
3
    char local[N];
3116
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
3
    const Int32 integer_str_len = N - (p - local);
3118
3
    const Int32 frac_str_len = decimal_places;
3119
3
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
3
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
3
    char* result_data = const_cast<char*>(result.data);
3125
3126
3
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
3
    if (decimal_places > 0) {
3132
3
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
3
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
3
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
9
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
6
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
6
        remaining_frac /= 10;
3140
6
    }
3141
3
    return result;
3142
3
}
_ZN5doris11FormatRound15do_format_roundIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
27
                          Int32 decimal_places) {
3081
27
    static_assert(std::is_integral<T>::value);
3082
27
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
27
    if (scale > decimal_places && decimal_places > 0) {
3086
3
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
3
        auto multiplier =
3089
3
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
3
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
3
        frac_value /= 10;
3094
24
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
15
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
15
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
27
    T decimal_power = common::exp10_i32(decimal_places);
3101
27
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
27
    bool append_sign_manually = false;
3111
27
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
27
    char local[N];
3116
27
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
27
    const Int32 integer_str_len = N - (p - local);
3118
27
    const Int32 frac_str_len = decimal_places;
3119
27
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
27
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
27
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
27
    char* result_data = const_cast<char*>(result.data);
3125
3126
27
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
27
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
27
    if (decimal_places > 0) {
3132
19
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
19
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
27
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
2.07k
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
2.04k
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
2.04k
        remaining_frac /= 10;
3140
2.04k
    }
3141
27
    return result;
3142
27
}
_ZN5doris11FormatRound15do_format_roundIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
18
                          Int32 decimal_places) {
3081
18
    static_assert(std::is_integral<T>::value);
3082
18
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
18
    if (scale > decimal_places && decimal_places > 0) {
3086
12
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
12
        auto multiplier =
3089
12
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
12
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
12
        frac_value /= 10;
3094
12
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
2
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
2
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
18
    T decimal_power = common::exp10_i32(decimal_places);
3101
18
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
18
    bool append_sign_manually = false;
3111
18
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
18
    char local[N];
3116
18
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
18
    const Int32 integer_str_len = N - (p - local);
3118
18
    const Int32 frac_str_len = decimal_places;
3119
18
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
18
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
18
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
18
    char* result_data = const_cast<char*>(result.data);
3125
3126
18
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
18
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
18
    if (decimal_places > 0) {
3132
17
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
17
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
18
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
78
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
60
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
60
        remaining_frac /= 10;
3140
60
    }
3141
18
    return result;
3142
18
}
3143
3144
} // namespace FormatRound
3145
3146
struct MoneyFormatDoubleImpl {
3147
8
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; }
3148
3149
    static void execute(FunctionContext* context, ColumnString* result_column,
3150
24
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3151
24
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3152
        // when scale is above 38, we will go here
3153
68
        for (size_t i = 0; i < input_rows_count; i++) {
3154
            // round to 2 decimal places
3155
44
            double value =
3156
44
                    MathFunctions::my_double_round(data_column->get_element(i), 2, false, false);
3157
44
            StringRef str = MoneyFormat::do_money_format(context, fmt::format("{:.2f}", value));
3158
44
            result_column->insert_data(str.data, str.size);
3159
44
        }
3160
24
    }
3161
};
3162
3163
struct MoneyFormatInt64Impl {
3164
8
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt64>()}; }
3165
3166
    static void execute(FunctionContext* context, ColumnString* result_column,
3167
33
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3168
33
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3169
85
        for (size_t i = 0; i < input_rows_count; i++) {
3170
52
            Int64 value = data_column->get_element(i);
3171
52
            StringRef str =
3172
52
                    MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_INT64()>(
3173
52
                            context, 0, value, 0);
3174
52
            result_column->insert_data(str.data, str.size);
3175
52
        }
3176
33
    }
3177
};
3178
3179
struct MoneyFormatInt128Impl {
3180
8
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt128>()}; }
3181
3182
    static void execute(FunctionContext* context, ColumnString* result_column,
3183
11
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3184
11
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3185
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3186
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3187
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3188
41
        for (size_t i = 0; i < input_rows_count; i++) {
3189
30
            Int128 value = data_column->get_element(i);
3190
30
            StringRef str =
3191
30
                    MoneyFormat::do_money_format<Int128, MoneyFormat::MAX_FORMAT_LEN_INT128()>(
3192
30
                            context, 0, value, 0);
3193
30
            result_column->insert_data(str.data, str.size);
3194
30
        }
3195
11
    }
3196
};
3197
3198
template <PrimitiveType Type>
3199
struct MoneyFormatDecimalImpl {
3200
40
    static DataTypes get_variadic_argument_types() {
3201
40
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
40
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv
Line
Count
Source
3200
8
    static DataTypes get_variadic_argument_types() {
3201
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
8
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv
Line
Count
Source
3200
8
    static DataTypes get_variadic_argument_types() {
3201
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
8
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv
Line
Count
Source
3200
8
    static DataTypes get_variadic_argument_types() {
3201
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
8
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv
Line
Count
Source
3200
8
    static DataTypes get_variadic_argument_types() {
3201
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
8
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv
Line
Count
Source
3200
8
    static DataTypes get_variadic_argument_types() {
3201
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
8
    }
3203
3204
    static void execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr,
3205
87
                        size_t input_rows_count) {
3206
87
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
16
            for (size_t i = 0; i < input_rows_count; i++) {
3208
14
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
14
                auto unified_frac_value = value.frac_value() / 1000000;
3211
14
                StringRef str =
3212
14
                        MoneyFormat::do_money_format<Int128,
3213
14
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
14
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
14
                result_column->insert_data(str.data, str.size);
3217
14
            }
3218
85
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
58
            const UInt32 scale = decimal32_column->get_scale();
3220
133
            for (size_t i = 0; i < input_rows_count; i++) {
3221
75
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
75
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
75
                StringRef str =
3224
75
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
75
                                context, scale, static_cast<Int64>(whole_part),
3226
75
                                static_cast<Int64>(frac_part));
3227
3228
75
                result_column->insert_data(str.data, str.size);
3229
75
            }
3230
58
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
17
            const UInt32 scale = decimal64_column->get_scale();
3232
36
            for (size_t i = 0; i < input_rows_count; i++) {
3233
19
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
19
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
19
                StringRef str =
3237
19
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
19
                                context, scale, whole_part, frac_part);
3239
3240
19
                result_column->insert_data(str.data, str.size);
3241
19
            }
3242
17
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
10
            const UInt32 scale = decimal128_column->get_scale();
3244
20
            for (size_t i = 0; i < input_rows_count; i++) {
3245
10
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
10
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
10
                StringRef str =
3249
10
                        MoneyFormat::do_money_format<Int128,
3250
10
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
10
                                context, scale, whole_part, frac_part);
3252
3253
10
                result_column->insert_data(str.data, str.size);
3254
10
            }
3255
10
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
87
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3205
2
                        size_t input_rows_count) {
3206
2
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
16
            for (size_t i = 0; i < input_rows_count; i++) {
3208
14
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
14
                auto unified_frac_value = value.frac_value() / 1000000;
3211
14
                StringRef str =
3212
14
                        MoneyFormat::do_money_format<Int128,
3213
14
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
14
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
14
                result_column->insert_data(str.data, str.size);
3217
14
            }
3218
2
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
0
            const UInt32 scale = decimal32_column->get_scale();
3220
0
            for (size_t i = 0; i < input_rows_count; i++) {
3221
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
0
                StringRef str =
3224
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
0
                                context, scale, static_cast<Int64>(whole_part),
3226
0
                                static_cast<Int64>(frac_part));
3227
3228
0
                result_column->insert_data(str.data, str.size);
3229
0
            }
3230
0
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
0
            const UInt32 scale = decimal64_column->get_scale();
3232
0
            for (size_t i = 0; i < input_rows_count; i++) {
3233
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
0
                StringRef str =
3237
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
0
                                context, scale, whole_part, frac_part);
3239
3240
0
                result_column->insert_data(str.data, str.size);
3241
0
            }
3242
0
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
0
            const UInt32 scale = decimal128_column->get_scale();
3244
0
            for (size_t i = 0; i < input_rows_count; i++) {
3245
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
0
                StringRef str =
3249
0
                        MoneyFormat::do_money_format<Int128,
3250
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
0
                                context, scale, whole_part, frac_part);
3252
3253
0
                result_column->insert_data(str.data, str.size);
3254
0
            }
3255
0
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
2
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3205
58
                        size_t input_rows_count) {
3206
58
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
0
            for (size_t i = 0; i < input_rows_count; i++) {
3208
0
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
0
                auto unified_frac_value = value.frac_value() / 1000000;
3211
0
                StringRef str =
3212
0
                        MoneyFormat::do_money_format<Int128,
3213
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
0
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
0
                result_column->insert_data(str.data, str.size);
3217
0
            }
3218
58
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
58
            const UInt32 scale = decimal32_column->get_scale();
3220
133
            for (size_t i = 0; i < input_rows_count; i++) {
3221
75
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
75
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
75
                StringRef str =
3224
75
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
75
                                context, scale, static_cast<Int64>(whole_part),
3226
75
                                static_cast<Int64>(frac_part));
3227
3228
75
                result_column->insert_data(str.data, str.size);
3229
75
            }
3230
58
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
0
            const UInt32 scale = decimal64_column->get_scale();
3232
0
            for (size_t i = 0; i < input_rows_count; i++) {
3233
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
0
                StringRef str =
3237
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
0
                                context, scale, whole_part, frac_part);
3239
3240
0
                result_column->insert_data(str.data, str.size);
3241
0
            }
3242
0
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
0
            const UInt32 scale = decimal128_column->get_scale();
3244
0
            for (size_t i = 0; i < input_rows_count; i++) {
3245
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
0
                StringRef str =
3249
0
                        MoneyFormat::do_money_format<Int128,
3250
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
0
                                context, scale, whole_part, frac_part);
3252
3253
0
                result_column->insert_data(str.data, str.size);
3254
0
            }
3255
0
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
58
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3205
17
                        size_t input_rows_count) {
3206
17
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
0
            for (size_t i = 0; i < input_rows_count; i++) {
3208
0
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
0
                auto unified_frac_value = value.frac_value() / 1000000;
3211
0
                StringRef str =
3212
0
                        MoneyFormat::do_money_format<Int128,
3213
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
0
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
0
                result_column->insert_data(str.data, str.size);
3217
0
            }
3218
17
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
0
            const UInt32 scale = decimal32_column->get_scale();
3220
0
            for (size_t i = 0; i < input_rows_count; i++) {
3221
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
0
                StringRef str =
3224
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
0
                                context, scale, static_cast<Int64>(whole_part),
3226
0
                                static_cast<Int64>(frac_part));
3227
3228
0
                result_column->insert_data(str.data, str.size);
3229
0
            }
3230
17
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
17
            const UInt32 scale = decimal64_column->get_scale();
3232
36
            for (size_t i = 0; i < input_rows_count; i++) {
3233
19
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
19
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
19
                StringRef str =
3237
19
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
19
                                context, scale, whole_part, frac_part);
3239
3240
19
                result_column->insert_data(str.data, str.size);
3241
19
            }
3242
17
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
0
            const UInt32 scale = decimal128_column->get_scale();
3244
0
            for (size_t i = 0; i < input_rows_count; i++) {
3245
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
0
                StringRef str =
3249
0
                        MoneyFormat::do_money_format<Int128,
3250
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
0
                                context, scale, whole_part, frac_part);
3252
3253
0
                result_column->insert_data(str.data, str.size);
3254
0
            }
3255
0
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
17
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3205
10
                        size_t input_rows_count) {
3206
10
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
0
            for (size_t i = 0; i < input_rows_count; i++) {
3208
0
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
0
                auto unified_frac_value = value.frac_value() / 1000000;
3211
0
                StringRef str =
3212
0
                        MoneyFormat::do_money_format<Int128,
3213
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
0
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
0
                result_column->insert_data(str.data, str.size);
3217
0
            }
3218
10
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
0
            const UInt32 scale = decimal32_column->get_scale();
3220
0
            for (size_t i = 0; i < input_rows_count; i++) {
3221
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
0
                StringRef str =
3224
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
0
                                context, scale, static_cast<Int64>(whole_part),
3226
0
                                static_cast<Int64>(frac_part));
3227
3228
0
                result_column->insert_data(str.data, str.size);
3229
0
            }
3230
10
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
0
            const UInt32 scale = decimal64_column->get_scale();
3232
0
            for (size_t i = 0; i < input_rows_count; i++) {
3233
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
0
                StringRef str =
3237
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
0
                                context, scale, whole_part, frac_part);
3239
3240
0
                result_column->insert_data(str.data, str.size);
3241
0
            }
3242
10
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
10
            const UInt32 scale = decimal128_column->get_scale();
3244
20
            for (size_t i = 0; i < input_rows_count; i++) {
3245
10
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
10
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
10
                StringRef str =
3249
10
                        MoneyFormat::do_money_format<Int128,
3250
10
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
10
                                context, scale, whole_part, frac_part);
3252
3253
10
                result_column->insert_data(str.data, str.size);
3254
10
            }
3255
10
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
10
    }
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
3281
};
3282
3283
struct FormatRoundDoubleImpl {
3284
8
    static DataTypes get_variadic_argument_types() {
3285
8
        return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()};
3286
8
    }
3287
3288
36
    static std::string add_thousands_separator(const std::string& formatted_num) {
3289
        //  Find the position of the decimal point
3290
36
        size_t dot_pos = formatted_num.find('.');
3291
36
        if (dot_pos == std::string::npos) {
3292
9
            dot_pos = formatted_num.size();
3293
9
        }
3294
3295
        // Handle the integer part
3296
36
        int start = (formatted_num[0] == '-') ? 1 : 0;
3297
36
        int digit_count = dot_pos - start;
3298
3299
        // There is no need to add commas.
3300
36
        if (digit_count <= 3) {
3301
23
            return formatted_num;
3302
23
        }
3303
3304
13
        std::string result;
3305
3306
13
        if (start == 1) result += '-';
3307
3308
        // Add the integer part (with comma)
3309
13
        int first_group = digit_count % 3;
3310
13
        if (first_group == 0) first_group = 3;
3311
13
        result.append(formatted_num, start, first_group);
3312
3313
1.03k
        for (size_t i = start + first_group; i < dot_pos; i += 3) {
3314
1.02k
            result += ',';
3315
1.02k
            result.append(formatted_num, i, 3);
3316
1.02k
        }
3317
3318
        // Add the decimal part (keep as it is)
3319
13
        if (dot_pos != formatted_num.size()) {
3320
10
            result.append(formatted_num, dot_pos);
3321
10
        }
3322
3323
13
        return result;
3324
36
    }
3325
3326
    template <bool is_const>
3327
    static Status execute(FunctionContext* context, ColumnString* result_column,
3328
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3329
33
                          size_t input_rows_count) {
3330
33
        const auto& arg_column_data_2 =
3331
33
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3332
33
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3333
        // when scale is above 38, we will go here
3334
69
        for (size_t i = 0; i < input_rows_count; i++) {
3335
36
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3336
36
            if (decimal_places < 0 || decimal_places > 1024) {
3337
0
                return Status::InvalidArgument(
3338
0
                        "The second argument is {}, it should be in range [0, 1024].",
3339
0
                        decimal_places);
3340
0
            }
3341
            // round to `decimal_places` decimal places
3342
36
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3343
36
                                                          decimal_places, false, false);
3344
36
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3345
36
            if (std::isfinite(value)) {
3346
36
                result_column->insert_value(add_thousands_separator(formatted_value));
3347
36
            } else {
3348
                // if value is not finite, we just insert the original formatted value
3349
                // e.g. "inf", "-inf", "nan"
3350
0
                result_column->insert_value(formatted_value);
3351
0
            }
3352
36
        }
3353
33
        return Status::OK();
3354
33
    }
_ZN5doris21FormatRoundDoubleImpl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3329
2
                          size_t input_rows_count) {
3330
2
        const auto& arg_column_data_2 =
3331
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3332
2
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3333
        // when scale is above 38, we will go here
3334
4
        for (size_t i = 0; i < input_rows_count; i++) {
3335
2
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3336
2
            if (decimal_places < 0 || decimal_places > 1024) {
3337
0
                return Status::InvalidArgument(
3338
0
                        "The second argument is {}, it should be in range [0, 1024].",
3339
0
                        decimal_places);
3340
0
            }
3341
            // round to `decimal_places` decimal places
3342
2
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3343
2
                                                          decimal_places, false, false);
3344
2
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3345
2
            if (std::isfinite(value)) {
3346
2
                result_column->insert_value(add_thousands_separator(formatted_value));
3347
2
            } else {
3348
                // if value is not finite, we just insert the original formatted value
3349
                // e.g. "inf", "-inf", "nan"
3350
0
                result_column->insert_value(formatted_value);
3351
0
            }
3352
2
        }
3353
2
        return Status::OK();
3354
2
    }
_ZN5doris21FormatRoundDoubleImpl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3329
31
                          size_t input_rows_count) {
3330
31
        const auto& arg_column_data_2 =
3331
31
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3332
31
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3333
        // when scale is above 38, we will go here
3334
65
        for (size_t i = 0; i < input_rows_count; i++) {
3335
34
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3336
34
            if (decimal_places < 0 || decimal_places > 1024) {
3337
0
                return Status::InvalidArgument(
3338
0
                        "The second argument is {}, it should be in range [0, 1024].",
3339
0
                        decimal_places);
3340
0
            }
3341
            // round to `decimal_places` decimal places
3342
34
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3343
34
                                                          decimal_places, false, false);
3344
34
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3345
34
            if (std::isfinite(value)) {
3346
34
                result_column->insert_value(add_thousands_separator(formatted_value));
3347
34
            } else {
3348
                // if value is not finite, we just insert the original formatted value
3349
                // e.g. "inf", "-inf", "nan"
3350
0
                result_column->insert_value(formatted_value);
3351
0
            }
3352
34
        }
3353
31
        return Status::OK();
3354
31
    }
3355
};
3356
3357
struct FormatRoundInt64Impl {
3358
8
    static DataTypes get_variadic_argument_types() {
3359
8
        return {std::make_shared<DataTypeInt64>(), std::make_shared<DataTypeInt32>()};
3360
8
    }
3361
3362
    template <bool is_const>
3363
    static Status execute(FunctionContext* context, ColumnString* result_column,
3364
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3365
9
                          size_t input_rows_count) {
3366
9
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3367
9
        const auto& arg_column_data_2 =
3368
9
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3369
25
        for (size_t i = 0; i < input_rows_count; i++) {
3370
16
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3371
16
            if (decimal_places < 0 || decimal_places > 1024) {
3372
0
                return Status::InvalidArgument(
3373
0
                        "The second argument is {}, it should be in range [0, 1024].",
3374
0
                        decimal_places);
3375
0
            }
3376
16
            Int64 value = data_column->get_element(i);
3377
16
            StringRef str =
3378
16
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3379
16
                            context, 0, value, 0, decimal_places);
3380
16
            result_column->insert_data(str.data, str.size);
3381
16
        }
3382
9
        return Status::OK();
3383
9
    }
_ZN5doris20FormatRoundInt64Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3365
4
                          size_t input_rows_count) {
3366
4
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3367
4
        const auto& arg_column_data_2 =
3368
4
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3369
8
        for (size_t i = 0; i < input_rows_count; i++) {
3370
4
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3371
4
            if (decimal_places < 0 || decimal_places > 1024) {
3372
0
                return Status::InvalidArgument(
3373
0
                        "The second argument is {}, it should be in range [0, 1024].",
3374
0
                        decimal_places);
3375
0
            }
3376
4
            Int64 value = data_column->get_element(i);
3377
4
            StringRef str =
3378
4
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3379
4
                            context, 0, value, 0, decimal_places);
3380
4
            result_column->insert_data(str.data, str.size);
3381
4
        }
3382
4
        return Status::OK();
3383
4
    }
_ZN5doris20FormatRoundInt64Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3365
5
                          size_t input_rows_count) {
3366
5
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3367
5
        const auto& arg_column_data_2 =
3368
5
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3369
17
        for (size_t i = 0; i < input_rows_count; i++) {
3370
12
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3371
12
            if (decimal_places < 0 || decimal_places > 1024) {
3372
0
                return Status::InvalidArgument(
3373
0
                        "The second argument is {}, it should be in range [0, 1024].",
3374
0
                        decimal_places);
3375
0
            }
3376
12
            Int64 value = data_column->get_element(i);
3377
12
            StringRef str =
3378
12
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3379
12
                            context, 0, value, 0, decimal_places);
3380
12
            result_column->insert_data(str.data, str.size);
3381
12
        }
3382
5
        return Status::OK();
3383
5
    }
3384
};
3385
3386
struct FormatRoundInt128Impl {
3387
8
    static DataTypes get_variadic_argument_types() {
3388
8
        return {std::make_shared<DataTypeInt128>(), std::make_shared<DataTypeInt32>()};
3389
8
    }
3390
3391
    template <bool is_const>
3392
    static Status execute(FunctionContext* context, ColumnString* result_column,
3393
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3394
3
                          size_t input_rows_count) {
3395
3
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3396
3
        const auto& arg_column_data_2 =
3397
3
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3398
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3399
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3400
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3401
14
        for (size_t i = 0; i < input_rows_count; i++) {
3402
11
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3403
11
            if (decimal_places < 0 || decimal_places > 1024) {
3404
0
                return Status::InvalidArgument(
3405
0
                        "The second argument is {}, it should be in range [0, 1024].",
3406
0
                        decimal_places);
3407
0
            }
3408
11
            Int128 value = data_column->get_element(i);
3409
11
            StringRef str =
3410
11
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3411
11
                            context, 0, value, 0, decimal_places);
3412
11
            result_column->insert_data(str.data, str.size);
3413
11
        }
3414
3
        return Status::OK();
3415
3
    }
_ZN5doris21FormatRoundInt128Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3394
2
                          size_t input_rows_count) {
3395
2
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3396
2
        const auto& arg_column_data_2 =
3397
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3398
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3399
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3400
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3401
4
        for (size_t i = 0; i < input_rows_count; i++) {
3402
2
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3403
2
            if (decimal_places < 0 || decimal_places > 1024) {
3404
0
                return Status::InvalidArgument(
3405
0
                        "The second argument is {}, it should be in range [0, 1024].",
3406
0
                        decimal_places);
3407
0
            }
3408
2
            Int128 value = data_column->get_element(i);
3409
2
            StringRef str =
3410
2
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3411
2
                            context, 0, value, 0, decimal_places);
3412
2
            result_column->insert_data(str.data, str.size);
3413
2
        }
3414
2
        return Status::OK();
3415
2
    }
_ZN5doris21FormatRoundInt128Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3394
1
                          size_t input_rows_count) {
3395
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3396
1
        const auto& arg_column_data_2 =
3397
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3398
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3399
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3400
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3401
10
        for (size_t i = 0; i < input_rows_count; i++) {
3402
9
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3403
9
            if (decimal_places < 0 || decimal_places > 1024) {
3404
0
                return Status::InvalidArgument(
3405
0
                        "The second argument is {}, it should be in range [0, 1024].",
3406
0
                        decimal_places);
3407
0
            }
3408
9
            Int128 value = data_column->get_element(i);
3409
9
            StringRef str =
3410
9
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3411
9
                            context, 0, value, 0, decimal_places);
3412
9
            result_column->insert_data(str.data, str.size);
3413
9
        }
3414
1
        return Status::OK();
3415
1
    }
3416
};
3417
3418
template <PrimitiveType Type>
3419
struct FormatRoundDecimalImpl {
3420
40
    static DataTypes get_variadic_argument_types() {
3421
40
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
40
                std::make_shared<DataTypeInt32>()};
3423
40
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv
Line
Count
Source
3420
8
    static DataTypes get_variadic_argument_types() {
3421
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
8
                std::make_shared<DataTypeInt32>()};
3423
8
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv
Line
Count
Source
3420
8
    static DataTypes get_variadic_argument_types() {
3421
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
8
                std::make_shared<DataTypeInt32>()};
3423
8
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv
Line
Count
Source
3420
8
    static DataTypes get_variadic_argument_types() {
3421
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
8
                std::make_shared<DataTypeInt32>()};
3423
8
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv
Line
Count
Source
3420
8
    static DataTypes get_variadic_argument_types() {
3421
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
8
                std::make_shared<DataTypeInt32>()};
3423
8
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv
Line
Count
Source
3420
8
    static DataTypes get_variadic_argument_types() {
3421
8
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
8
                std::make_shared<DataTypeInt32>()};
3423
8
    }
3424
3425
    template <bool is_const>
3426
    static Status execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr,
3427
49
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
49
        const auto& arg_column_data_2 =
3429
49
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
49
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
4
            for (size_t i = 0; i < input_rows_count; i++) {
3432
3
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
3
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
3
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
3
                auto unified_frac_value = value.frac_value() / 1000000;
3441
3
                StringRef str =
3442
3
                        FormatRound::do_format_round<Int128,
3443
3
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
3
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
3
                result_column->insert_data(str.data, str.size);
3447
3
            }
3448
48
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
27
            const UInt32 scale = decimal32_column->get_scale();
3450
54
            for (size_t i = 0; i < input_rows_count; i++) {
3451
27
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
27
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
27
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
27
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
27
                StringRef str =
3460
27
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
27
                                context, scale, static_cast<Int64>(whole_part),
3462
27
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
27
                result_column->insert_data(str.data, str.size);
3465
27
            }
3466
27
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
17
            const UInt32 scale = decimal64_column->get_scale();
3468
35
            for (size_t i = 0; i < input_rows_count; i++) {
3469
20
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
20
                if (decimal_places < 0 || decimal_places > 1024) {
3471
2
                    return Status::InvalidArgument(
3472
2
                            "The second argument is {}, it should be in range [0, 1024].",
3473
2
                            decimal_places);
3474
2
                }
3475
18
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
18
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
18
                StringRef str =
3479
18
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
18
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
18
                result_column->insert_data(str.data, str.size);
3483
18
            }
3484
17
        } else if (const auto* decimal128_column =
3485
4
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
4
            const UInt32 scale = decimal128_column->get_scale();
3487
8
            for (size_t i = 0; i < input_rows_count; i++) {
3488
4
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
4
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
4
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
4
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
4
                StringRef str =
3498
4
                        FormatRound::do_format_round<Int128,
3499
4
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
4
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
4
                result_column->insert_data(str.data, str.size);
3503
4
            }
3504
4
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
47
        return Status::OK();
3509
49
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
1
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
1
        const auto& arg_column_data_2 =
3429
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
1
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
4
            for (size_t i = 0; i < input_rows_count; i++) {
3432
3
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
3
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
3
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
3
                auto unified_frac_value = value.frac_value() / 1000000;
3441
3
                StringRef str =
3442
3
                        FormatRound::do_format_round<Int128,
3443
3
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
3
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
3
                result_column->insert_data(str.data, str.size);
3447
3
            }
3448
1
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
0
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
0
            const UInt32 scale = decimal64_column->get_scale();
3468
0
            for (size_t i = 0; i < input_rows_count; i++) {
3469
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
0
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
0
                StringRef str =
3479
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
0
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
0
                result_column->insert_data(str.data, str.size);
3483
0
            }
3484
0
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
1
        return Status::OK();
3509
1
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
27
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
27
        const auto& arg_column_data_2 =
3429
27
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
27
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
27
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
27
            const UInt32 scale = decimal32_column->get_scale();
3450
54
            for (size_t i = 0; i < input_rows_count; i++) {
3451
27
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
27
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
27
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
27
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
27
                StringRef str =
3460
27
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
27
                                context, scale, static_cast<Int64>(whole_part),
3462
27
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
27
                result_column->insert_data(str.data, str.size);
3465
27
            }
3466
27
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
0
            const UInt32 scale = decimal64_column->get_scale();
3468
0
            for (size_t i = 0; i < input_rows_count; i++) {
3469
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
0
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
0
                StringRef str =
3479
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
0
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
0
                result_column->insert_data(str.data, str.size);
3483
0
            }
3484
0
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
27
        return Status::OK();
3509
27
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
2
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
2
        const auto& arg_column_data_2 =
3429
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
2
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
2
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
2
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
2
            const UInt32 scale = decimal64_column->get_scale();
3468
4
            for (size_t i = 0; i < input_rows_count; i++) {
3469
2
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
2
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
2
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
2
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
2
                StringRef str =
3479
2
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
2
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
2
                result_column->insert_data(str.data, str.size);
3483
2
            }
3484
2
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
2
        return Status::OK();
3509
2
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
15
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
15
        const auto& arg_column_data_2 =
3429
15
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
15
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
15
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
15
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
15
            const UInt32 scale = decimal64_column->get_scale();
3468
31
            for (size_t i = 0; i < input_rows_count; i++) {
3469
18
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
18
                if (decimal_places < 0 || decimal_places > 1024) {
3471
2
                    return Status::InvalidArgument(
3472
2
                            "The second argument is {}, it should be in range [0, 1024].",
3473
2
                            decimal_places);
3474
2
                }
3475
16
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
16
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
16
                StringRef str =
3479
16
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
16
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
16
                result_column->insert_data(str.data, str.size);
3483
16
            }
3484
15
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
13
        return Status::OK();
3509
15
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
2
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
2
        const auto& arg_column_data_2 =
3429
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
2
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
2
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
2
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
0
            const UInt32 scale = decimal64_column->get_scale();
3468
0
            for (size_t i = 0; i < input_rows_count; i++) {
3469
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
0
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
0
                StringRef str =
3479
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
0
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
0
                result_column->insert_data(str.data, str.size);
3483
0
            }
3484
2
        } else if (const auto* decimal128_column =
3485
2
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
2
            const UInt32 scale = decimal128_column->get_scale();
3487
4
            for (size_t i = 0; i < input_rows_count; i++) {
3488
2
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
2
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
2
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
2
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
2
                StringRef str =
3498
2
                        FormatRound::do_format_round<Int128,
3499
2
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
2
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
2
                result_column->insert_data(str.data, str.size);
3503
2
            }
3504
2
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
2
        return Status::OK();
3509
2
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
2
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
2
        const auto& arg_column_data_2 =
3429
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
2
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
2
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
2
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
0
            const UInt32 scale = decimal64_column->get_scale();
3468
0
            for (size_t i = 0; i < input_rows_count; i++) {
3469
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
0
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
0
                StringRef str =
3479
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
0
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
0
                result_column->insert_data(str.data, str.size);
3483
0
            }
3484
2
        } else if (const auto* decimal128_column =
3485
2
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
2
            const UInt32 scale = decimal128_column->get_scale();
3487
4
            for (size_t i = 0; i < input_rows_count; i++) {
3488
2
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
2
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
2
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
2
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
2
                StringRef str =
3498
2
                        FormatRound::do_format_round<Int128,
3499
2
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
2
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
2
                result_column->insert_data(str.data, str.size);
3503
2
            }
3504
2
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
2
        return Status::OK();
3509
2
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
3510
};
3511
3512
class FunctionStringLocatePos : public IFunction {
3513
public:
3514
    static constexpr auto name = "locate";
3515
832
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
3516
0
    String get_name() const override { return name; }
3517
0
    size_t get_number_of_arguments() const override { return 3; }
3518
3519
823
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3520
823
        return std::make_shared<DataTypeInt32>();
3521
823
    }
3522
3523
8
    DataTypes get_variadic_argument_types_impl() const override {
3524
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3525
8
                std::make_shared<DataTypeInt32>()};
3526
8
    }
3527
3528
824
    bool is_variadic() const override { return true; }
3529
3530
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3531
610
                        uint32_t result, size_t input_rows_count) const override {
3532
610
        if (arguments.size() != 3) {
3533
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
3534
0
                                           get_name(), arguments.size());
3535
0
        }
3536
610
        bool col_const[3];
3537
610
        ColumnPtr argument_columns[3];
3538
2.44k
        for (int i = 0; i < 3; ++i) {
3539
1.83k
            std::tie(argument_columns[i], col_const[i]) =
3540
1.83k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3541
1.83k
        }
3542
3543
610
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
3544
610
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
3545
610
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
3546
3547
610
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
3548
610
        auto& vec_res = col_res->get_data();
3549
610
        vec_res.resize(block.rows());
3550
3551
610
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
3552
3553
610
        if (col_const[0]) {
3554
248
            std::visit(
3555
248
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
248
                        scalar_search<is_ascii, str_const, pos_const>(
3557
248
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
248
                                input_rows_count);
3559
248
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3555
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
22
                        scalar_search<is_ascii, str_const, pos_const>(
3557
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
22
                                input_rows_count);
3559
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
3555
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
22
                        scalar_search<is_ascii, str_const, pos_const>(
3557
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
22
                                input_rows_count);
3559
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
3555
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
22
                        scalar_search<is_ascii, str_const, pos_const>(
3557
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
22
                                input_rows_count);
3559
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
3555
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
60
                        scalar_search<is_ascii, str_const, pos_const>(
3557
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
60
                                input_rows_count);
3559
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
3555
62
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
62
                        scalar_search<is_ascii, str_const, pos_const>(
3557
62
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
62
                                input_rows_count);
3559
62
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
3555
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
60
                        scalar_search<is_ascii, str_const, pos_const>(
3557
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
60
                                input_rows_count);
3559
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
3560
248
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
3561
248
                    make_bool_variant(col_const[2]));
3562
3563
362
        } else {
3564
362
            std::visit(
3565
362
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
362
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
362
                                                                      col_pos->get_data(), vec_res,
3568
362
                                                                      input_rows_count);
3569
362
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3565
23
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
23
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
23
                                                                      col_pos->get_data(), vec_res,
3568
23
                                                                      input_rows_count);
3569
23
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
3565
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
22
                                                                      col_pos->get_data(), vec_res,
3568
22
                                                                      input_rows_count);
3569
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
3565
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
22
                                                                      col_pos->get_data(), vec_res,
3568
22
                                                                      input_rows_count);
3569
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
3565
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
22
                                                                      col_pos->get_data(), vec_res,
3568
22
                                                                      input_rows_count);
3569
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
3565
93
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
93
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
93
                                                                      col_pos->get_data(), vec_res,
3568
93
                                                                      input_rows_count);
3569
93
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
3565
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
60
                                                                      col_pos->get_data(), vec_res,
3568
60
                                                                      input_rows_count);
3569
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
3565
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
60
                                                                      col_pos->get_data(), vec_res,
3568
60
                                                                      input_rows_count);
3569
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3565
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
60
                                                                      col_pos->get_data(), vec_res,
3568
60
                                                                      input_rows_count);
3569
60
                    },
3570
362
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
3571
362
                    make_bool_variant(col_const[2]));
3572
362
        }
3573
610
        block.replace_by_position(result, std::move(col_res));
3574
610
        return Status::OK();
3575
610
    }
3576
3577
private:
3578
    template <bool is_ascii, bool str_const, bool pos_const>
3579
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
3580
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
3581
248
                       size_t size) const {
3582
248
        res.resize(size);
3583
248
        StringRef substr(ldata.data, ldata.size);
3584
248
        StringSearch search {&substr};
3585
3586
499
        for (int i = 0; i < size; ++i) {
3587
251
            res[i] = locate_pos<is_ascii>(substr,
3588
251
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
251
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
251
        }
3591
248
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
22
                       size_t size) const {
3582
22
        res.resize(size);
3583
22
        StringRef substr(ldata.data, ldata.size);
3584
22
        StringSearch search {&substr};
3585
3586
44
        for (int i = 0; i < size; ++i) {
3587
22
            res[i] = locate_pos<is_ascii>(substr,
3588
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
22
        }
3591
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
22
                       size_t size) const {
3582
22
        res.resize(size);
3583
22
        StringRef substr(ldata.data, ldata.size);
3584
22
        StringSearch search {&substr};
3585
3586
44
        for (int i = 0; i < size; ++i) {
3587
22
            res[i] = locate_pos<is_ascii>(substr,
3588
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
22
        }
3591
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
22
                       size_t size) const {
3582
22
        res.resize(size);
3583
22
        StringRef substr(ldata.data, ldata.size);
3584
22
        StringSearch search {&substr};
3585
3586
44
        for (int i = 0; i < size; ++i) {
3587
22
            res[i] = locate_pos<is_ascii>(substr,
3588
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
22
        }
3591
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
60
                       size_t size) const {
3582
60
        res.resize(size);
3583
60
        StringRef substr(ldata.data, ldata.size);
3584
60
        StringSearch search {&substr};
3585
3586
120
        for (int i = 0; i < size; ++i) {
3587
60
            res[i] = locate_pos<is_ascii>(substr,
3588
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
60
        }
3591
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
62
                       size_t size) const {
3582
62
        res.resize(size);
3583
62
        StringRef substr(ldata.data, ldata.size);
3584
62
        StringSearch search {&substr};
3585
3586
127
        for (int i = 0; i < size; ++i) {
3587
65
            res[i] = locate_pos<is_ascii>(substr,
3588
65
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
65
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
65
        }
3591
62
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
60
                       size_t size) const {
3582
60
        res.resize(size);
3583
60
        StringRef substr(ldata.data, ldata.size);
3584
60
        StringSearch search {&substr};
3585
3586
120
        for (int i = 0; i < size; ++i) {
3587
60
            res[i] = locate_pos<is_ascii>(substr,
3588
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
60
        }
3591
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
3592
3593
    template <bool is_ascii, bool str_const, bool pos_const>
3594
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
3595
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
3596
362
                       size_t size) const {
3597
362
        res.resize(size);
3598
362
        StringSearch search;
3599
906
        for (int i = 0; i < size; ++i) {
3600
544
            StringRef substr = col_left->get_data_at(i);
3601
544
            search.set_pattern(&substr);
3602
544
            res[i] = locate_pos<is_ascii>(substr,
3603
544
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
544
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
544
        }
3606
362
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
23
                       size_t size) const {
3597
23
        res.resize(size);
3598
23
        StringSearch search;
3599
71
        for (int i = 0; i < size; ++i) {
3600
48
            StringRef substr = col_left->get_data_at(i);
3601
48
            search.set_pattern(&substr);
3602
48
            res[i] = locate_pos<is_ascii>(substr,
3603
48
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
48
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
48
        }
3606
23
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
22
                       size_t size) const {
3597
22
        res.resize(size);
3598
22
        StringSearch search;
3599
44
        for (int i = 0; i < size; ++i) {
3600
22
            StringRef substr = col_left->get_data_at(i);
3601
22
            search.set_pattern(&substr);
3602
22
            res[i] = locate_pos<is_ascii>(substr,
3603
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
22
        }
3606
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
22
                       size_t size) const {
3597
22
        res.resize(size);
3598
22
        StringSearch search;
3599
44
        for (int i = 0; i < size; ++i) {
3600
22
            StringRef substr = col_left->get_data_at(i);
3601
22
            search.set_pattern(&substr);
3602
22
            res[i] = locate_pos<is_ascii>(substr,
3603
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
22
        }
3606
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
22
                       size_t size) const {
3597
22
        res.resize(size);
3598
22
        StringSearch search;
3599
44
        for (int i = 0; i < size; ++i) {
3600
22
            StringRef substr = col_left->get_data_at(i);
3601
22
            search.set_pattern(&substr);
3602
22
            res[i] = locate_pos<is_ascii>(substr,
3603
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
22
        }
3606
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
93
                       size_t size) const {
3597
93
        res.resize(size);
3598
93
        StringSearch search;
3599
343
        for (int i = 0; i < size; ++i) {
3600
250
            StringRef substr = col_left->get_data_at(i);
3601
250
            search.set_pattern(&substr);
3602
250
            res[i] = locate_pos<is_ascii>(substr,
3603
250
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
250
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
250
        }
3606
93
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
60
                       size_t size) const {
3597
60
        res.resize(size);
3598
60
        StringSearch search;
3599
120
        for (int i = 0; i < size; ++i) {
3600
60
            StringRef substr = col_left->get_data_at(i);
3601
60
            search.set_pattern(&substr);
3602
60
            res[i] = locate_pos<is_ascii>(substr,
3603
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
60
        }
3606
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
60
                       size_t size) const {
3597
60
        res.resize(size);
3598
60
        StringSearch search;
3599
120
        for (int i = 0; i < size; ++i) {
3600
60
            StringRef substr = col_left->get_data_at(i);
3601
60
            search.set_pattern(&substr);
3602
60
            res[i] = locate_pos<is_ascii>(substr,
3603
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
60
        }
3606
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
60
                       size_t size) const {
3597
60
        res.resize(size);
3598
60
        StringSearch search;
3599
120
        for (int i = 0; i < size; ++i) {
3600
60
            StringRef substr = col_left->get_data_at(i);
3601
60
            search.set_pattern(&substr);
3602
60
            res[i] = locate_pos<is_ascii>(substr,
3603
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
60
        }
3606
60
    }
3607
3608
    template <bool is_ascii>
3609
795
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3610
795
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3611
            // BEHAVIOR COMPATIBLE WITH MYSQL
3612
            // locate('','')  locate('','',1) locate('','',2)
3613
            // 1  1 0
3614
11
            return 1;
3615
11
        }
3616
784
        if (is_ascii) {
3617
604
            return locate_pos_ascii(substr, str, search, start_pos);
3618
604
        } else {
3619
180
            return locate_pos_utf8(substr, str, search, start_pos);
3620
180
        }
3621
784
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
3609
180
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3610
180
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3611
            // BEHAVIOR COMPATIBLE WITH MYSQL
3612
            // locate('','')  locate('','',1) locate('','',2)
3613
            // 1  1 0
3614
0
            return 1;
3615
0
        }
3616
180
        if (is_ascii) {
3617
0
            return locate_pos_ascii(substr, str, search, start_pos);
3618
180
        } else {
3619
180
            return locate_pos_utf8(substr, str, search, start_pos);
3620
180
        }
3621
180
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
3609
615
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3610
615
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3611
            // BEHAVIOR COMPATIBLE WITH MYSQL
3612
            // locate('','')  locate('','',1) locate('','',2)
3613
            // 1  1 0
3614
11
            return 1;
3615
11
        }
3616
604
        if (is_ascii) {
3617
604
            return locate_pos_ascii(substr, str, search, start_pos);
3618
604
        } else {
3619
0
            return locate_pos_utf8(substr, str, search, start_pos);
3620
0
        }
3621
604
    }
3622
3623
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
3624
180
                        int start_pos) const {
3625
180
        std::vector<size_t> index;
3626
180
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
3627
180
        if (start_pos <= 0 || start_pos > char_len) {
3628
43
            return 0;
3629
43
        }
3630
137
        if (substr.size == 0) {
3631
17
            return start_pos;
3632
17
        }
3633
        // Input start_pos starts from 1.
3634
120
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
3635
120
        int32_t match_pos = search.search(&adjusted_str);
3636
120
        if (match_pos >= 0) {
3637
            // Hive returns the position in the original string starting from 1.
3638
104
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
3639
104
        } else {
3640
16
            return 0;
3641
16
        }
3642
120
    }
3643
3644
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
3645
604
                         int start_pos) const {
3646
604
        if (start_pos <= 0 || start_pos > str.size) {
3647
404
            return 0;
3648
404
        }
3649
200
        if (substr.size == 0) {
3650
36
            return start_pos;
3651
36
        }
3652
        // Input start_pos starts from 1.
3653
164
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
3654
164
        int32_t match_pos = search.search(&adjusted_str);
3655
164
        if (match_pos >= 0) {
3656
            // Hive returns the position in the original string starting from 1.
3657
50
            return start_pos + match_pos;
3658
114
        } else {
3659
114
            return 0;
3660
114
        }
3661
164
    }
3662
};
3663
3664
struct ReplaceImpl {
3665
    static constexpr auto name = "replace";
3666
};
3667
3668
struct ReplaceEmptyImpl {
3669
    static constexpr auto name = "replace_empty";
3670
};
3671
3672
template <typename Impl, bool empty>
3673
class FunctionReplace : public IFunction {
3674
public:
3675
    static constexpr auto name = Impl::name;
3676
4.37k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv
Line
Count
Source
3676
2.85k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv
Line
Count
Source
3676
1.52k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
3677
2
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev
Line
Count
Source
3677
1
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev
Line
Count
Source
3677
1
    String get_name() const override { return name; }
3678
4.36k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv
Line
Count
Source
3678
2.84k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv
Line
Count
Source
3678
1.51k
    size_t get_number_of_arguments() const override { return 3; }
3679
3680
4.36k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3681
4.36k
        return std::make_shared<DataTypeString>();
3682
4.36k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3680
2.84k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3681
2.84k
        return std::make_shared<DataTypeString>();
3682
2.84k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3680
1.51k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3681
1.51k
        return std::make_shared<DataTypeString>();
3682
1.51k
    }
3683
3684
16
    DataTypes get_variadic_argument_types_impl() const override {
3685
16
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3686
16
                std::make_shared<DataTypeString>()};
3687
16
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv
Line
Count
Source
3684
8
    DataTypes get_variadic_argument_types_impl() const override {
3685
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3686
8
                std::make_shared<DataTypeString>()};
3687
8
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv
Line
Count
Source
3684
8
    DataTypes get_variadic_argument_types_impl() const override {
3685
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3686
8
                std::make_shared<DataTypeString>()};
3687
8
    }
3688
3689
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3690
3.08k
                        uint32_t result, size_t input_rows_count) const override {
3691
        // We need a local variable to hold a reference to the converted column.
3692
        // So that the converted column will not be released before we use it.
3693
3.08k
        ColumnPtr col[3];
3694
3.08k
        bool col_const[3];
3695
12.3k
        for (size_t i = 0; i < 3; ++i) {
3696
9.24k
            std::tie(col[i], col_const[i]) =
3697
9.24k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3698
9.24k
        }
3699
3700
3.08k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3701
3.08k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3702
3.08k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3703
3704
3.08k
        ColumnString::MutablePtr col_res = ColumnString::create();
3705
3706
3.08k
        std::visit(
3707
3.08k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
15.3k
                    for (int i = 0; i < input_rows_count; ++i) {
3709
12.2k
                        StringRef origin_str =
3710
12.2k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
12.2k
                        StringRef old_str =
3712
12.2k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
12.2k
                        StringRef new_str =
3714
12.2k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
12.2k
                        std::string result =
3717
12.2k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
12.2k
                                        new_str.to_string_view());
3719
3720
12.2k
                        col_res->insert_data(result.data(), result.length());
3721
12.2k
                    }
3722
3.08k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
3707
142
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
533
                    for (int i = 0; i < input_rows_count; ++i) {
3709
391
                        StringRef origin_str =
3710
391
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
391
                        StringRef old_str =
3712
391
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
391
                        StringRef new_str =
3714
391
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
391
                        std::string result =
3717
391
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
391
                                        new_str.to_string_view());
3719
3720
391
                        col_res->insert_data(result.data(), result.length());
3721
391
                    }
3722
142
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
1.43k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
11.5k
                    for (int i = 0; i < input_rows_count; ++i) {
3709
10.1k
                        StringRef origin_str =
3710
10.1k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
10.1k
                        StringRef old_str =
3712
10.1k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
10.1k
                        StringRef new_str =
3714
10.1k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
10.1k
                        std::string result =
3717
10.1k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
10.1k
                                        new_str.to_string_view());
3719
3720
10.1k
                        col_res->insert_data(result.data(), result.length());
3721
10.1k
                    }
3722
1.43k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
3707
126
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
467
                    for (int i = 0; i < input_rows_count; ++i) {
3709
341
                        StringRef origin_str =
3710
341
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
341
                        StringRef old_str =
3712
341
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
341
                        StringRef new_str =
3714
341
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
341
                        std::string result =
3717
341
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
341
                                        new_str.to_string_view());
3719
3720
341
                        col_res->insert_data(result.data(), result.length());
3721
341
                    }
3722
126
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
3723
3.08k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3724
3.08k
                make_bool_variant(col_const[2]));
3725
3726
3.08k
        block.replace_by_position(result, std::move(col_res));
3727
3.08k
        return Status::OK();
3728
3.08k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3690
2.20k
                        uint32_t result, size_t input_rows_count) const override {
3691
        // We need a local variable to hold a reference to the converted column.
3692
        // So that the converted column will not be released before we use it.
3693
2.20k
        ColumnPtr col[3];
3694
2.20k
        bool col_const[3];
3695
8.82k
        for (size_t i = 0; i < 3; ++i) {
3696
6.61k
            std::tie(col[i], col_const[i]) =
3697
6.61k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3698
6.61k
        }
3699
3700
2.20k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3701
2.20k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3702
2.20k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3703
3704
2.20k
        ColumnString::MutablePtr col_res = ColumnString::create();
3705
3706
2.20k
        std::visit(
3707
2.20k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
2.20k
                    for (int i = 0; i < input_rows_count; ++i) {
3709
2.20k
                        StringRef origin_str =
3710
2.20k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
2.20k
                        StringRef old_str =
3712
2.20k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
2.20k
                        StringRef new_str =
3714
2.20k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
2.20k
                        std::string result =
3717
2.20k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
2.20k
                                        new_str.to_string_view());
3719
3720
2.20k
                        col_res->insert_data(result.data(), result.length());
3721
2.20k
                    }
3722
2.20k
                },
3723
2.20k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3724
2.20k
                make_bool_variant(col_const[2]));
3725
3726
2.20k
        block.replace_by_position(result, std::move(col_res));
3727
2.20k
        return Status::OK();
3728
2.20k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3690
876
                        uint32_t result, size_t input_rows_count) const override {
3691
        // We need a local variable to hold a reference to the converted column.
3692
        // So that the converted column will not be released before we use it.
3693
876
        ColumnPtr col[3];
3694
876
        bool col_const[3];
3695
3.50k
        for (size_t i = 0; i < 3; ++i) {
3696
2.62k
            std::tie(col[i], col_const[i]) =
3697
2.62k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3698
2.62k
        }
3699
3700
876
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3701
876
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3702
876
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3703
3704
876
        ColumnString::MutablePtr col_res = ColumnString::create();
3705
3706
876
        std::visit(
3707
876
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
876
                    for (int i = 0; i < input_rows_count; ++i) {
3709
876
                        StringRef origin_str =
3710
876
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
876
                        StringRef old_str =
3712
876
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
876
                        StringRef new_str =
3714
876
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
876
                        std::string result =
3717
876
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
876
                                        new_str.to_string_view());
3719
3720
876
                        col_res->insert_data(result.data(), result.length());
3721
876
                    }
3722
876
                },
3723
876
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3724
876
                make_bool_variant(col_const[2]));
3725
3726
876
        block.replace_by_position(result, std::move(col_res));
3727
876
        return Status::OK();
3728
876
    }
3729
3730
private:
3731
12.2k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3732
12.2k
        if (old_str.empty()) {
3733
494
            if constexpr (empty) {
3734
247
                return str;
3735
247
            } else {
3736
                // Different from "Replace" only when the search string is empty.
3737
                // it will insert `new_str` in front of every character and at the end of the old str.
3738
247
                if (new_str.empty()) {
3739
59
                    return str;
3740
59
                }
3741
188
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3742
188
                    std::string result;
3743
188
                    ColumnString::check_chars_length(
3744
188
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3745
188
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3746
648
                    for (char c : str) {
3747
648
                        result += new_str;
3748
648
                        result += c;
3749
648
                    }
3750
188
                    result += new_str;
3751
188
                    return result;
3752
188
                } else {
3753
0
                    std::string result;
3754
0
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3755
0
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3756
0
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3757
0
                        result += new_str;
3758
0
                        result.append(&str[i], utf8_char_len);
3759
0
                    }
3760
0
                    result += new_str;
3761
0
                    ColumnString::check_chars_length(result.size(), 0);
3762
0
                    return result;
3763
0
                }
3764
188
            }
3765
11.7k
        } else {
3766
11.7k
            std::string::size_type pos = 0;
3767
11.7k
            std::string::size_type oldLen = old_str.size();
3768
11.7k
            std::string::size_type newLen = new_str.size();
3769
13.6k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3770
1.84k
                str.replace(pos, oldLen, new_str);
3771
1.84k
                pos += newLen;
3772
1.84k
            }
3773
11.7k
            return str;
3774
11.7k
        }
3775
12.2k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
3731
11.1k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3732
11.1k
        if (old_str.empty()) {
3733
247
            if constexpr (empty) {
3734
247
                return str;
3735
            } else {
3736
                // Different from "Replace" only when the search string is empty.
3737
                // it will insert `new_str` in front of every character and at the end of the old str.
3738
                if (new_str.empty()) {
3739
                    return str;
3740
                }
3741
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3742
                    std::string result;
3743
                    ColumnString::check_chars_length(
3744
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3745
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3746
                    for (char c : str) {
3747
                        result += new_str;
3748
                        result += c;
3749
                    }
3750
                    result += new_str;
3751
                    return result;
3752
                } else {
3753
                    std::string result;
3754
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3755
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3756
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3757
                        result += new_str;
3758
                        result.append(&str[i], utf8_char_len);
3759
                    }
3760
                    result += new_str;
3761
                    ColumnString::check_chars_length(result.size(), 0);
3762
                    return result;
3763
                }
3764
            }
3765
10.9k
        } else {
3766
10.9k
            std::string::size_type pos = 0;
3767
10.9k
            std::string::size_type oldLen = old_str.size();
3768
10.9k
            std::string::size_type newLen = new_str.size();
3769
12.4k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3770
1.51k
                str.replace(pos, oldLen, new_str);
3771
1.51k
                pos += newLen;
3772
1.51k
            }
3773
10.9k
            return str;
3774
10.9k
        }
3775
11.1k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
3731
1.09k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3732
1.09k
        if (old_str.empty()) {
3733
            if constexpr (empty) {
3734
                return str;
3735
247
            } else {
3736
                // Different from "Replace" only when the search string is empty.
3737
                // it will insert `new_str` in front of every character and at the end of the old str.
3738
247
                if (new_str.empty()) {
3739
59
                    return str;
3740
59
                }
3741
188
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3742
188
                    std::string result;
3743
188
                    ColumnString::check_chars_length(
3744
188
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3745
188
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3746
648
                    for (char c : str) {
3747
648
                        result += new_str;
3748
648
                        result += c;
3749
648
                    }
3750
188
                    result += new_str;
3751
188
                    return result;
3752
188
                } else {
3753
0
                    std::string result;
3754
0
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3755
0
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3756
0
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3757
0
                        result += new_str;
3758
0
                        result.append(&str[i], utf8_char_len);
3759
0
                    }
3760
0
                    result += new_str;
3761
0
                    ColumnString::check_chars_length(result.size(), 0);
3762
0
                    return result;
3763
0
                }
3764
188
            }
3765
844
        } else {
3766
844
            std::string::size_type pos = 0;
3767
844
            std::string::size_type oldLen = old_str.size();
3768
844
            std::string::size_type newLen = new_str.size();
3769
1.17k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3770
328
                str.replace(pos, oldLen, new_str);
3771
328
                pos += newLen;
3772
328
            }
3773
844
            return str;
3774
844
        }
3775
1.09k
    }
3776
};
3777
3778
struct ReverseImpl {
3779
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
3780
67
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
3781
67
        auto rows_count = offsets.size();
3782
67
        res_offsets.resize(rows_count);
3783
67
        res_data.reserve(data.size());
3784
188
        for (ssize_t i = 0; i < rows_count; ++i) {
3785
121
            auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
3786
121
            int64_t src_len = offsets[i] - offsets[i - 1];
3787
121
            std::string dst;
3788
121
            dst.resize(src_len);
3789
121
            simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst);
3790
121
            StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data,
3791
121
                                        res_offsets);
3792
121
        }
3793
67
        return Status::OK();
3794
67
    }
3795
};
3796
3797
template <typename Impl>
3798
class FunctionSubReplace : public IFunction {
3799
public:
3800
    static constexpr auto name = "sub_replace";
3801
3802
91
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv
Line
Count
Source
3802
40
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv
Line
Count
Source
3802
51
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
3803
3804
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev
3805
3806
73
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3807
73
        return make_nullable(std::make_shared<DataTypeString>());
3808
73
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3806
31
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3807
31
        return make_nullable(std::make_shared<DataTypeString>());
3808
31
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3806
42
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3807
42
        return make_nullable(std::make_shared<DataTypeString>());
3808
42
    }
3809
3810
75
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv
Line
Count
Source
3810
32
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv
Line
Count
Source
3810
43
    bool is_variadic() const override { return true; }
3811
3812
16
    DataTypes get_variadic_argument_types_impl() const override {
3813
16
        return Impl::get_variadic_argument_types();
3814
16
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
3812
8
    DataTypes get_variadic_argument_types_impl() const override {
3813
8
        return Impl::get_variadic_argument_types();
3814
8
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv
Line
Count
Source
3812
8
    DataTypes get_variadic_argument_types_impl() const override {
3813
8
        return Impl::get_variadic_argument_types();
3814
8
    }
3815
3816
0
    size_t get_number_of_arguments() const override {
3817
0
        return get_variadic_argument_types_impl().size();
3818
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv
3819
3820
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3821
97
                        uint32_t result, size_t input_rows_count) const override {
3822
97
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
3823
97
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3821
43
                        uint32_t result, size_t input_rows_count) const override {
3822
43
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
3823
43
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3821
54
                        uint32_t result, size_t input_rows_count) const override {
3822
54
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
3823
54
    }
3824
};
3825
3826
struct SubReplaceImpl {
3827
    static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
3828
98
                                  size_t input_rows_count) {
3829
98
        auto res_column = ColumnString::create();
3830
98
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
3831
98
        auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
3832
98
        ColumnPtr argument_columns[4];
3833
98
        bool col_const[4];
3834
490
        for (int i = 0; i < 4; ++i) {
3835
392
            std::tie(argument_columns[i], col_const[i]) =
3836
392
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3837
392
        }
3838
98
        const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
3839
98
        const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
3840
98
        const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get());
3841
98
        const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get());
3842
3843
98
        std::visit(
3844
98
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
3845
98
                    if (data_column->is_ascii()) {
3846
74
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
3847
74
                                data_column, mask_column, start_column->get_data(),
3848
74
                                length_column->get_data(), args_null_map->get_data(), result_column,
3849
74
                                input_rows_count);
3850
74
                    } else {
3851
24
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
3852
24
                                data_column, mask_column, start_column->get_data(),
3853
24
                                length_column->get_data(), args_null_map->get_data(), result_column,
3854
24
                                input_rows_count);
3855
24
                    }
3856
98
                },
_ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
3844
98
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
3845
98
                    if (data_column->is_ascii()) {
3846
74
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
3847
74
                                data_column, mask_column, start_column->get_data(),
3848
74
                                length_column->get_data(), args_null_map->get_data(), result_column,
3849
74
                                input_rows_count);
3850
74
                    } else {
3851
24
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
3852
24
                                data_column, mask_column, start_column->get_data(),
3853
24
                                length_column->get_data(), args_null_map->get_data(), result_column,
3854
24
                                input_rows_count);
3855
24
                    }
3856
98
                },
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_
3857
98
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3858
98
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
3859
98
        block.get_by_position(result).column =
3860
98
                ColumnNullable::create(std::move(res_column), std::move(args_null_map));
3861
98
        return Status::OK();
3862
98
    }
3863
3864
private:
3865
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
3866
    static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column,
3867
                             const PaddedPODArray<Int32>& args_start,
3868
                             const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
3869
74
                             ColumnString* result_column, size_t input_rows_count) {
3870
74
        ColumnString::Chars& res_chars = result_column->get_chars();
3871
74
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3872
10.4k
        for (size_t row = 0; row < input_rows_count; ++row) {
3873
10.3k
            StringRef origin_str =
3874
10.3k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3875
10.3k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3876
10.3k
            const auto start = args_start[index_check_const<start_const>(row)];
3877
10.3k
            const auto length = args_length[index_check_const<len_const>(row)];
3878
10.3k
            const size_t origin_str_len = origin_str.size;
3879
            //input is null, start < 0, len < 0, str_size <= start. return NULL
3880
10.3k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
3881
10.2k
                res_offsets.push_back(res_chars.size());
3882
10.2k
                args_null_map[row] = 1;
3883
10.2k
            } else {
3884
96
                std::string_view replace_str = new_str.to_string_view();
3885
96
                std::string result = origin_str.to_string();
3886
96
                result.replace(start, length, replace_str);
3887
96
                result_column->insert_data(result.data(), result.length());
3888
96
            }
3889
10.3k
        }
3890
74
    }
_ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
3869
74
                             ColumnString* result_column, size_t input_rows_count) {
3870
74
        ColumnString::Chars& res_chars = result_column->get_chars();
3871
74
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3872
10.4k
        for (size_t row = 0; row < input_rows_count; ++row) {
3873
10.3k
            StringRef origin_str =
3874
10.3k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3875
10.3k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3876
10.3k
            const auto start = args_start[index_check_const<start_const>(row)];
3877
10.3k
            const auto length = args_length[index_check_const<len_const>(row)];
3878
10.3k
            const size_t origin_str_len = origin_str.size;
3879
            //input is null, start < 0, len < 0, str_size <= start. return NULL
3880
10.3k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
3881
10.2k
                res_offsets.push_back(res_chars.size());
3882
10.2k
                args_null_map[row] = 1;
3883
10.2k
            } else {
3884
96
                std::string_view replace_str = new_str.to_string_view();
3885
96
                std::string result = origin_str.to_string();
3886
96
                result.replace(start, length, replace_str);
3887
96
                result_column->insert_data(result.data(), result.length());
3888
96
            }
3889
10.3k
        }
3890
74
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
3891
3892
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
3893
    static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column,
3894
                            const PaddedPODArray<Int32>& args_start,
3895
                            const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
3896
24
                            ColumnString* result_column, size_t input_rows_count) {
3897
24
        ColumnString::Chars& res_chars = result_column->get_chars();
3898
24
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3899
3900
48
        for (size_t row = 0; row < input_rows_count; ++row) {
3901
24
            StringRef origin_str =
3902
24
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3903
24
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3904
24
            const auto start = args_start[index_check_const<start_const>(row)];
3905
24
            const auto length = args_length[index_check_const<len_const>(row)];
3906
            //input is null, start < 0, len < 0 return NULL
3907
24
            if (args_null_map[row] || start < 0 || length < 0) {
3908
2
                res_offsets.push_back(res_chars.size());
3909
2
                args_null_map[row] = 1;
3910
2
                continue;
3911
2
            }
3912
3913
22
            const auto [start_byte_len, start_char_len] =
3914
22
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
3915
22
                                                                           origin_str.end(), start);
3916
3917
            // start >= orgin.size
3918
22
            DCHECK(start_char_len <= start);
3919
22
            if (start_byte_len == origin_str.size) {
3920
8
                res_offsets.push_back(res_chars.size());
3921
8
                args_null_map[row] = 1;
3922
8
                continue;
3923
8
            }
3924
3925
14
            auto [end_byte_len, end_char_len] =
3926
14
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
3927
14
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
3928
14
            DCHECK(end_char_len <= length);
3929
14
            std::string_view replace_str = new_str.to_string_view();
3930
14
            std::string result = origin_str.to_string();
3931
14
            result.replace(start_byte_len, end_byte_len, replace_str);
3932
14
            result_column->insert_data(result.data(), result.length());
3933
14
        }
3934
24
    }
_ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
3896
24
                            ColumnString* result_column, size_t input_rows_count) {
3897
24
        ColumnString::Chars& res_chars = result_column->get_chars();
3898
24
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3899
3900
48
        for (size_t row = 0; row < input_rows_count; ++row) {
3901
24
            StringRef origin_str =
3902
24
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3903
24
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3904
24
            const auto start = args_start[index_check_const<start_const>(row)];
3905
24
            const auto length = args_length[index_check_const<len_const>(row)];
3906
            //input is null, start < 0, len < 0 return NULL
3907
24
            if (args_null_map[row] || start < 0 || length < 0) {
3908
2
                res_offsets.push_back(res_chars.size());
3909
2
                args_null_map[row] = 1;
3910
2
                continue;
3911
2
            }
3912
3913
22
            const auto [start_byte_len, start_char_len] =
3914
22
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
3915
22
                                                                           origin_str.end(), start);
3916
3917
            // start >= orgin.size
3918
22
            DCHECK(start_char_len <= start);
3919
22
            if (start_byte_len == origin_str.size) {
3920
8
                res_offsets.push_back(res_chars.size());
3921
8
                args_null_map[row] = 1;
3922
8
                continue;
3923
8
            }
3924
3925
14
            auto [end_byte_len, end_char_len] =
3926
14
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
3927
14
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
3928
            DCHECK(end_char_len <= length);
3929
14
            std::string_view replace_str = new_str.to_string_view();
3930
14
            std::string result = origin_str.to_string();
3931
14
            result.replace(start_byte_len, end_byte_len, replace_str);
3932
14
            result_column->insert_data(result.data(), result.length());
3933
14
        }
3934
24
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
3935
};
3936
3937
struct SubReplaceThreeImpl {
3938
8
    static DataTypes get_variadic_argument_types() {
3939
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3940
8
                std::make_shared<DataTypeInt32>()};
3941
8
    }
3942
3943
    static Status execute_impl(FunctionContext* context, Block& block,
3944
                               const ColumnNumbers& arguments, uint32_t result,
3945
43
                               size_t input_rows_count) {
3946
43
        auto params = ColumnInt32::create(input_rows_count);
3947
43
        auto& strlen_data = params->get_data();
3948
3949
43
        auto str_col =
3950
43
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
3951
43
        if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
3952
0
            str_col = nullable->get_nested_column_ptr();
3953
0
        }
3954
43
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
3955
        // use utf8 len
3956
120
        for (int i = 0; i < input_rows_count; ++i) {
3957
77
            StringRef str_ref = str_column->get_data_at(i);
3958
77
            strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size);
3959
77
        }
3960
3961
43
        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
3962
43
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
3963
43
                                        block.columns() - 1};
3964
43
        return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
3965
43
    }
3966
};
3967
3968
struct SubReplaceFourImpl {
3969
8
    static DataTypes get_variadic_argument_types() {
3970
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3971
8
                std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
3972
8
    }
3973
3974
    static Status execute_impl(FunctionContext* context, Block& block,
3975
                               const ColumnNumbers& arguments, uint32_t result,
3976
54
                               size_t input_rows_count) {
3977
54
        return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
3978
54
    }
3979
};
3980
3981
class FunctionConvertTo : public IFunction {
3982
public:
3983
    static constexpr auto name = "convert_to";
3984
3985
15
    static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
3986
3987
1
    String get_name() const override { return name; }
3988
3989
6
    size_t get_number_of_arguments() const override { return 2; }
3990
3991
6
    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
3992
6
        return std::make_shared<DataTypeString>();
3993
6
    }
3994
3995
29
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
3996
29
        if (scope != FunctionContext::THREAD_LOCAL) {
3997
6
            return Status::OK();
3998
6
        }
3999
23
        if (!context->is_col_constant(1)) {
4000
0
            return Status::InvalidArgument(
4001
0
                    "character argument to convert function must be constant.");
4002
0
        }
4003
23
        const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
4004
23
        if (!iequal(character_data.to_string(), "gbk")) {
4005
0
            return Status::RuntimeError(
4006
0
                    "Illegal second argument column of function convert. now only support "
4007
0
                    "convert to character set of gbk");
4008
0
        }
4009
4010
23
        return Status::OK();
4011
23
    }
4012
4013
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4014
14
                        uint32_t result, size_t input_rows_count) const override {
4015
14
        ColumnPtr argument_column =
4016
14
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
4017
14
        const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
4018
14
        const auto& str_offset = str_col->get_offsets();
4019
14
        const auto& str_chars = str_col->get_chars();
4020
14
        auto col_res = ColumnString::create();
4021
14
        auto& res_offset = col_res->get_offsets();
4022
14
        auto& res_chars = col_res->get_chars();
4023
14
        res_offset.resize(input_rows_count);
4024
        // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3
4025
14
        size_t pinyin_size = (str_chars.size() + 2) / 3 * 7;
4026
14
        ColumnString::check_chars_length(pinyin_size, 0);
4027
14
        res_chars.resize(pinyin_size);
4028
4029
14
        size_t in_len = 0, out_len = 0;
4030
49
        for (int i = 0; i < input_rows_count; ++i) {
4031
35
            in_len = str_offset[i] - str_offset[i - 1];
4032
35
            const char* in = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
4033
35
            char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
4034
35
            _utf8_to_pinyin(in, in_len, out, &out_len);
4035
35
            res_offset[i] = res_offset[i - 1] + out_len;
4036
35
        }
4037
14
        res_chars.resize(res_offset[input_rows_count - 1]);
4038
14
        block.replace_by_position(result, std::move(col_res));
4039
14
        return Status::OK();
4040
14
    }
4041
4042
35
    void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) const {
4043
225
        auto do_memcpy = [](char*& dest, const char*& from, size_t size) {
4044
225
            memcpy_small_allow_read_write_overflow15(dest, from, size);
4045
225
            dest += size;
4046
225
            from += size;
4047
225
        };
4048
35
        auto from = in;
4049
35
        auto dest = out;
4050
4051
273
        while (from - in < in_len) {
4052
238
            auto length = get_utf8_byte_length(*from);
4053
238
            if (length != 3) {
4054
225
                do_memcpy(dest, from, length);
4055
225
            } else {
4056
                // convert utf8 to unicode code to get pinyin offset
4057
13
                if (auto tmp = (((int)(*from & 0x0F)) << 12) | (((int)(*(from + 1) & 0x3F)) << 6) |
4058
13
                               (*(from + 2) & 0x3F);
4059
13
                    tmp >= START_UNICODE_OFFSET and tmp < END_UNICODE_OFFSET) {
4060
13
                    const char* buf = nullptr;
4061
13
                    if (tmp >= START_UNICODE_OFFSET && tmp < MID_UNICODE_OFFSET) {
4062
2
                        buf = PINYIN_DICT1 + (tmp - START_UNICODE_OFFSET) * MAX_PINYIN_LEN;
4063
11
                    } else if (tmp >= MID_UNICODE_OFFSET && tmp < END_UNICODE_OFFSET) {
4064
11
                        buf = PINYIN_DICT2 + (tmp - MID_UNICODE_OFFSET) * MAX_PINYIN_LEN;
4065
11
                    }
4066
4067
13
                    auto end = strchr(buf, ' ');
4068
                    // max len for pinyin is 6
4069
13
                    int len = MAX_PINYIN_LEN;
4070
13
                    if (end != nullptr && end - buf < MAX_PINYIN_LEN) {
4071
3
                        len = end - buf;
4072
3
                    }
4073
                    // set first char '~' just make sure all english word lower than chinese word
4074
13
                    *dest = 126;
4075
13
                    memcpy(dest + 1, buf, len);
4076
13
                    dest += (len + 1);
4077
13
                    from += 3;
4078
13
                } else {
4079
0
                    do_memcpy(dest, from, 3);
4080
0
                }
4081
13
            }
4082
238
        }
4083
4084
35
        *out_len = dest - out;
4085
35
    }
4086
};
4087
4088
// refer to https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char
4089
//      UTF8
4090
// 多  0xe5, 0xa4, 0x9a  0xb6, 0xe0
4091
// 睿  0xe7, 0x9d, 0xbf  0xee, 0xa3
4092
// 丝  0xe4, 0xb8, 0x9d  0xcb, 0xbf 14989469
4093
// MySQL behaviour:
4094
// mysql> select char(0xe4, 0xb8, 0x9d using utf8);
4095
// +-----------------------------------+
4096
// | char(0xe4, 0xb8, 0x9d using utf8) |
4097
// +-----------------------------------+
4098
// | 丝                                |
4099
// +-----------------------------------+
4100
// 1 row in set, 1 warning (0.00 sec)
4101
// mysql> select char(14989469 using utf8);
4102
// +---------------------------+
4103
// | char(14989469 using utf8) |
4104
// +---------------------------+
4105
// | 丝                        |
4106
// +---------------------------+
4107
// 1 row in set, 1 warning (0.00 sec)
4108
// mysql> select char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8);
4109
// +---------------------------------------------------------------------------------------------+
4110
// | char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8) |
4111
// +---------------------------------------------------------------------------------------------+
4112
// | 多睿丝 Doris                                                                                 |
4113
// +---------------------------------------------------------------------------------------------+
4114
// mysql> select char(68, 111, 114, 0, 105, null, 115 using utf8);
4115
// +--------------------------------------------------+
4116
// | char(68, 111, 114, 0, 105, null, 115 using utf8) |
4117
// +--------------------------------------------------+
4118
// | Dor is                                           |
4119
// +--------------------------------------------------+
4120
4121
// return null:
4122
// mysql>  select char(255 using utf8);
4123
// +----------------------+
4124
// | char(255 using utf8) |
4125
// +----------------------+
4126
// | NULL                 |
4127
// +----------------------+
4128
// 1 row in set, 2 warnings (0.00 sec)
4129
//
4130
// mysql> show warnings;
4131
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4132
// | Level   | Code | Message                                                                                                                                                                     |
4133
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4134
// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. |
4135
// | Warning | 1300 | Invalid utf8mb3 character string: 'FF'                                                                                                                                      |
4136
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4137
// 2 rows in set (0.01 sec)
4138
4139
// max int value:
4140
// mysql> select char(18446744073709551615);
4141
// +--------------------------------------------------------+
4142
// | char(18446744073709551615)                             |
4143
// +--------------------------------------------------------+
4144
// | 0xFFFFFFFF                                             |
4145
// +--------------------------------------------------------+
4146
// 1 row in set (0.00 sec)
4147
//
4148
// mysql> select char(18446744073709551616);
4149
// +--------------------------------------------------------+
4150
// | char(18446744073709551616)                             |
4151
// +--------------------------------------------------------+
4152
// | 0xFFFFFFFF                                             |
4153
// +--------------------------------------------------------+
4154
// 1 row in set, 1 warning (0.00 sec)
4155
//
4156
// mysql> show warnings;
4157
// +---------+------+-----------------------------------------------------------+
4158
// | Level   | Code | Message                                                   |
4159
// +---------+------+-----------------------------------------------------------+
4160
// | Warning | 1292 | Truncated incorrect DECIMAL value: '18446744073709551616' |
4161
// +---------+------+-----------------------------------------------------------+
4162
// 1 row in set (0.00 sec)
4163
4164
// table columns:
4165
// mysql> select * from t;
4166
// +------+------+------+
4167
// | f1   | f2   | f3   |
4168
// +------+------+------+
4169
// |  228 |  184 |  157 |
4170
// |  228 |  184 |    0 |
4171
// |  228 |  184 |   99 |
4172
// |   99 |  228 |  184 |
4173
// +------+------+------+
4174
// 4 rows in set (0.00 sec)
4175
//
4176
// mysql> select char(f1, f2, f3 using utf8) from t;
4177
// +-----------------------------+
4178
// | char(f1, f2, f3 using utf8) |
4179
// +-----------------------------+
4180
// | 丝                          |
4181
// |                             |
4182
// |                             |
4183
// | c                           |
4184
// +-----------------------------+
4185
// 4 rows in set, 4 warnings (0.00 sec)
4186
//
4187
// mysql> show warnings;
4188
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4189
// | Level   | Code | Message                                                                                                                                                                     |
4190
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4191
// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. |
4192
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B800'                                                                                                                                  |
4193
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B863'                                                                                                                                  |
4194
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B8'                                                                                                                                    |
4195
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4196
class FunctionIntToChar : public IFunction {
4197
public:
4198
    static constexpr auto name = "char";
4199
320
    static FunctionPtr create() { return std::make_shared<FunctionIntToChar>(); }
4200
0
    String get_name() const override { return name; }
4201
0
    size_t get_number_of_arguments() const override { return 0; }
4202
312
    bool is_variadic() const override { return true; }
4203
4204
311
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4205
311
        return make_nullable(std::make_shared<DataTypeString>());
4206
311
    }
4207
622
    bool use_default_implementation_for_nulls() const override { return false; }
4208
4209
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4210
311
                        uint32_t result, size_t input_rows_count) const override {
4211
311
        DCHECK_GE(arguments.size(), 2);
4212
4213
311
        int argument_size = arguments.size();
4214
311
        std::vector<ColumnPtr> str_columns(argument_size - 1);
4215
311
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size - 1);
4216
311
        std::vector<const ColumnString::Chars*> chars_list(argument_size - 1);
4217
4218
        // convert each argument columns to column string and then concat the string columns
4219
701
        for (size_t i = 1; i < argument_size; ++i) {
4220
390
            if (auto const_column = check_and_get_column<const ColumnConst>(
4221
390
                        *block.get_by_position(arguments[i]).column)) {
4222
                // ignore null
4223
4
                if (const_column->only_null()) {
4224
0
                    str_columns[i - 1] = nullptr;
4225
4
                } else {
4226
4
                    auto str_column = ColumnString::create();
4227
4
                    auto& chars = str_column->get_chars();
4228
4
                    auto& offsets = str_column->get_offsets();
4229
4
                    offsets.resize(1);
4230
4
                    const ColumnInt32* int_column;
4231
4
                    if (auto* nullable = check_and_get_column<const ColumnNullable>(
4232
4
                                const_column->get_data_column())) {
4233
0
                        int_column = assert_cast<const ColumnInt32*>(
4234
0
                                nullable->get_nested_column_ptr().get());
4235
4
                    } else {
4236
4
                        int_column =
4237
4
                                assert_cast<const ColumnInt32*>(&const_column->get_data_column());
4238
4
                    }
4239
4
                    int int_val = int_column->get_int(0);
4240
4
                    integer_to_char_(0, &int_val, chars, offsets);
4241
4
                    str_columns[i - 1] =
4242
4
                            ColumnConst::create(std::move(str_column), input_rows_count);
4243
4
                }
4244
4
                offsets_list[i - 1] = nullptr;
4245
4
                chars_list[i - 1] = nullptr;
4246
386
            } else {
4247
386
                auto str_column = ColumnString::create();
4248
386
                auto& chars = str_column->get_chars();
4249
386
                auto& offsets = str_column->get_offsets();
4250
                // data.resize(input_rows_count);
4251
386
                offsets.resize(input_rows_count);
4252
4253
386
                if (auto nullable = check_and_get_column<const ColumnNullable>(
4254
386
                            *block.get_by_position(arguments[i]).column)) {
4255
23
                    const auto* int_data =
4256
23
                            assert_cast<const ColumnInt32*>(nullable->get_nested_column_ptr().get())
4257
23
                                    ->get_data()
4258
23
                                    .data();
4259
23
                    const auto* null_map_data = nullable->get_null_map_data().data();
4260
148
                    for (size_t j = 0; j < input_rows_count; ++j) {
4261
                        // ignore null
4262
125
                        if (null_map_data[j]) {
4263
23
                            offsets[j] = offsets[j - 1];
4264
102
                        } else {
4265
102
                            integer_to_char_(j, int_data + j, chars, offsets);
4266
102
                        }
4267
125
                    }
4268
363
                } else {
4269
363
                    const auto* int_data = assert_cast<const ColumnInt32*>(
4270
363
                                                   block.get_by_position(arguments[i]).column.get())
4271
363
                                                   ->get_data()
4272
363
                                                   .data();
4273
770
                    for (size_t j = 0; j < input_rows_count; ++j) {
4274
407
                        integer_to_char_(j, int_data + j, chars, offsets);
4275
407
                    }
4276
363
                }
4277
386
                offsets_list[i - 1] = &str_column->get_offsets();
4278
386
                chars_list[i - 1] = &str_column->get_chars();
4279
386
                str_columns[i - 1] = std::move(str_column);
4280
386
            }
4281
390
        }
4282
4283
311
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
4284
311
        auto res = ColumnString::create();
4285
311
        auto& res_data = res->get_chars();
4286
311
        auto& res_offset = res->get_offsets();
4287
4288
311
        size_t res_reserve_size = 0;
4289
701
        for (size_t i = 0; i < argument_size - 1; ++i) {
4290
390
            if (!str_columns[i]) {
4291
0
                continue;
4292
0
            }
4293
390
            if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[i])) {
4294
4
                auto str_column =
4295
4
                        assert_cast<const ColumnString*>(&(const_column->get_data_column()));
4296
4
                auto& offsets = str_column->get_offsets();
4297
4
                res_reserve_size += (offsets[0] - offsets[-1]) * input_rows_count;
4298
386
            } else {
4299
918
                for (size_t j = 0; j < input_rows_count; ++j) {
4300
532
                    size_t append = (*offsets_list[i])[j] - (*offsets_list[i])[j - 1];
4301
                    // check whether the output might overflow(unlikely)
4302
532
                    if (UNLIKELY(UINT_MAX - append < res_reserve_size)) {
4303
0
                        return Status::BufferAllocFailed(
4304
0
                                "function char output is too large to allocate");
4305
0
                    }
4306
532
                    res_reserve_size += append;
4307
532
                }
4308
386
            }
4309
390
        }
4310
311
        if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
4311
0
            return Status::BufferAllocFailed("function char output is too large to allocate");
4312
0
        }
4313
311
        ColumnString::check_chars_length(res_reserve_size, 0);
4314
311
        res_data.resize(res_reserve_size);
4315
311
        res_offset.resize(input_rows_count);
4316
4317
666
        for (size_t i = 0; i < input_rows_count; ++i) {
4318
355
            int current_length = 0;
4319
915
            for (size_t j = 0; j < argument_size - 1; ++j) {
4320
560
                if (!str_columns[j]) {
4321
0
                    continue;
4322
0
                }
4323
560
                if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[j])) {
4324
28
                    auto str_column = assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(
4325
28
                            &(const_column->get_data_column()));
4326
28
                    auto data_item = str_column->get_data_at(0);
4327
28
                    memcpy_small_allow_read_write_overflow15(
4328
28
                            &res_data[res_offset[i - 1]] + current_length, data_item.data,
4329
28
                            data_item.size);
4330
28
                    current_length += data_item.size;
4331
532
                } else {
4332
532
                    auto& current_offsets = *offsets_list[j];
4333
532
                    auto& current_chars = *chars_list[j];
4334
4335
532
                    int size = current_offsets[i] - current_offsets[i - 1];
4336
532
                    if (size > 0) {
4337
509
                        memcpy_small_allow_read_write_overflow15(
4338
509
                                &res_data[res_offset[i - 1]] + current_length,
4339
509
                                &current_chars[current_offsets[i - 1]], size);
4340
509
                        current_length += size;
4341
509
                    }
4342
532
                }
4343
560
            }
4344
355
            res_offset[i] = res_offset[i - 1] + current_length;
4345
355
        }
4346
4347
        // validate utf8
4348
311
        auto* null_map_data = null_map->get_data().data();
4349
666
        for (size_t i = 0; i < input_rows_count; ++i) {
4350
355
            if (!validate_utf8((const char*)(&res_data[res_offset[i - 1]]),
4351
355
                               res_offset[i] - res_offset[i - 1])) {
4352
136
                null_map_data[i] = 1;
4353
136
            }
4354
355
        }
4355
4356
311
        block.get_by_position(result).column =
4357
311
                ColumnNullable::create(std::move(res), std::move(null_map));
4358
311
        return Status::OK();
4359
311
    }
4360
4361
private:
4362
    void integer_to_char_(int line_num, const int* num, ColumnString::Chars& chars,
4363
513
                          IColumn::Offsets& offsets) const {
4364
513
        if (0 == *num) {
4365
26
            chars.push_back('\0');
4366
26
            offsets[line_num] = offsets[line_num - 1] + 1;
4367
26
            return;
4368
26
        }
4369
487
        const char* bytes = (const char*)(num);
4370
487
        if constexpr (std::endian::native == std::endian::little) {
4371
487
            int k = 3;
4372
1.87k
            for (; k >= 0; --k) {
4373
1.87k
                if (bytes[k]) {
4374
487
                    break;
4375
487
                }
4376
1.87k
            }
4377
487
            offsets[line_num] = offsets[line_num - 1] + k + 1;
4378
1.05k
            for (; k >= 0; --k) {
4379
565
                chars.push_back(bytes[k] ? bytes[k] : '\0');
4380
565
            }
4381
        } else if constexpr (std::endian::native == std::endian::big) {
4382
            int k = 0;
4383
            for (; k < 4; ++k) {
4384
                if (bytes[k]) {
4385
                    break;
4386
                }
4387
            }
4388
            offsets[line_num] = offsets[line_num - 1] + 4 - k;
4389
            for (; k < 4; ++k) {
4390
                chars.push_back(bytes[k] ? bytes[k] : '\0');
4391
            }
4392
        } else {
4393
            static_assert(std::endian::native == std::endian::big ||
4394
                                  std::endian::native == std::endian::little,
4395
                          "Unsupported endianness");
4396
        }
4397
487
    }
4398
};
4399
4400
class FunctionOverlay : public IFunction {
4401
public:
4402
    static constexpr auto name = "overlay";
4403
31
    static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); }
4404
1
    String get_name() const override { return name; }
4405
22
    size_t get_number_of_arguments() const override { return 4; }
4406
4407
22
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4408
22
        return std::make_shared<DataTypeString>();
4409
22
    }
4410
4411
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4412
18
                        uint32_t result, size_t input_rows_count) const override {
4413
18
        DCHECK_EQ(arguments.size(), 4);
4414
4415
18
        bool col_const[4];
4416
18
        ColumnPtr argument_columns[4];
4417
90
        for (int i = 0; i < 4; ++i) {
4418
72
            std::tie(argument_columns[i], col_const[i]) =
4419
72
                    unpack_if_const(block.get_by_position(arguments[i]).column);
4420
72
        }
4421
4422
18
        const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get());
4423
4424
18
        const auto* col_pos =
4425
18
                assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data();
4426
18
        const auto* col_len =
4427
18
                assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data();
4428
18
        const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get());
4429
4430
18
        ColumnString::MutablePtr col_res = ColumnString::create();
4431
4432
        // if all input string is ascii, we can use ascii function to handle it
4433
18
        const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii();
4434
18
        std::visit(
4435
18
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
18
                    if (is_all_ascii) {
4437
8
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
8
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
8
                                input_rows_count);
4440
10
                    } else {
4441
10
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
10
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
10
                                input_rows_count);
4444
10
                    }
4445
18
                },
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
3
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
3
                    if (is_all_ascii) {
4437
1
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
1
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
1
                                input_rows_count);
4440
2
                    } else {
4441
2
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
2
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
2
                                input_rows_count);
4444
2
                    }
4445
3
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
1
                    if (is_all_ascii) {
4437
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
0
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
0
                                input_rows_count);
4440
1
                    } else {
4441
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
1
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
1
                                input_rows_count);
4444
1
                    }
4445
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
1
                    if (is_all_ascii) {
4437
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
0
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
0
                                input_rows_count);
4440
1
                    } else {
4441
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
1
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
1
                                input_rows_count);
4444
1
                    }
4445
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
12
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
12
                    if (is_all_ascii) {
4437
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
6
                                input_rows_count);
4440
6
                    } else {
4441
6
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
6
                                input_rows_count);
4444
6
                    }
4445
12
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
1
                    if (is_all_ascii) {
4437
1
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
1
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
1
                                input_rows_count);
4440
1
                    } else {
4441
0
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
0
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
0
                                input_rows_count);
4444
0
                    }
4445
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_
4446
18
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
4447
18
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
4448
18
        block.replace_by_position(result, std::move(col_res));
4449
18
        return Status::OK();
4450
18
    }
4451
4452
private:
4453
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
4454
    static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len,
4455
                             const ColumnString* col_insert, ColumnString::MutablePtr& col_res,
4456
8
                             size_t input_rows_count) {
4457
8
        auto& col_res_chars = col_res->get_chars();
4458
8
        auto& col_res_offsets = col_res->get_offsets();
4459
8
        StringRef origin_str, insert_str;
4460
45
        for (size_t i = 0; i < input_rows_count; i++) {
4461
37
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4462
            // pos is 1-based index,so we need to minus 1
4463
37
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4464
37
            const auto len = col_len[index_check_const<len_const>(i)];
4465
37
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4466
37
            const auto origin_size = origin_str.size;
4467
37
            if (pos >= origin_size || pos < 0) {
4468
                // If pos is not within the length of the string, the original string is returned.
4469
8
                col_res->insert_data(origin_str.data, origin_str.size);
4470
8
                continue;
4471
8
            }
4472
29
            col_res_chars.insert(origin_str.data,
4473
29
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4474
29
            if (pos + len > origin_size || len < 0) {
4475
5
                col_res_chars.insert(insert_str.begin(),
4476
5
                                     insert_str.end()); // copy all of insert_str.
4477
24
            } else {
4478
24
                col_res_chars.insert(insert_str.begin(),
4479
24
                                     insert_str.end()); // copy all of insert_str.
4480
24
                col_res_chars.insert(
4481
24
                        origin_str.data + pos + len,
4482
24
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4483
24
            }
4484
29
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4485
29
            col_res_offsets.push_back(col_res_chars.size());
4486
29
        }
4487
8
    }
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4456
1
                             size_t input_rows_count) {
4457
1
        auto& col_res_chars = col_res->get_chars();
4458
1
        auto& col_res_offsets = col_res->get_offsets();
4459
1
        StringRef origin_str, insert_str;
4460
2
        for (size_t i = 0; i < input_rows_count; i++) {
4461
1
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4462
            // pos is 1-based index,so we need to minus 1
4463
1
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4464
1
            const auto len = col_len[index_check_const<len_const>(i)];
4465
1
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4466
1
            const auto origin_size = origin_str.size;
4467
1
            if (pos >= origin_size || pos < 0) {
4468
                // If pos is not within the length of the string, the original string is returned.
4469
0
                col_res->insert_data(origin_str.data, origin_str.size);
4470
0
                continue;
4471
0
            }
4472
1
            col_res_chars.insert(origin_str.data,
4473
1
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4474
1
            if (pos + len > origin_size || len < 0) {
4475
1
                col_res_chars.insert(insert_str.begin(),
4476
1
                                     insert_str.end()); // copy all of insert_str.
4477
1
            } else {
4478
0
                col_res_chars.insert(insert_str.begin(),
4479
0
                                     insert_str.end()); // copy all of insert_str.
4480
0
                col_res_chars.insert(
4481
0
                        origin_str.data + pos + len,
4482
0
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4483
0
            }
4484
1
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4485
1
            col_res_offsets.push_back(col_res_chars.size());
4486
1
        }
4487
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4456
6
                             size_t input_rows_count) {
4457
6
        auto& col_res_chars = col_res->get_chars();
4458
6
        auto& col_res_offsets = col_res->get_offsets();
4459
6
        StringRef origin_str, insert_str;
4460
12
        for (size_t i = 0; i < input_rows_count; i++) {
4461
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4462
            // pos is 1-based index,so we need to minus 1
4463
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4464
6
            const auto len = col_len[index_check_const<len_const>(i)];
4465
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4466
6
            const auto origin_size = origin_str.size;
4467
6
            if (pos >= origin_size || pos < 0) {
4468
                // If pos is not within the length of the string, the original string is returned.
4469
3
                col_res->insert_data(origin_str.data, origin_str.size);
4470
3
                continue;
4471
3
            }
4472
3
            col_res_chars.insert(origin_str.data,
4473
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4474
3
            if (pos + len > origin_size || len < 0) {
4475
1
                col_res_chars.insert(insert_str.begin(),
4476
1
                                     insert_str.end()); // copy all of insert_str.
4477
2
            } else {
4478
2
                col_res_chars.insert(insert_str.begin(),
4479
2
                                     insert_str.end()); // copy all of insert_str.
4480
2
                col_res_chars.insert(
4481
2
                        origin_str.data + pos + len,
4482
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4483
2
            }
4484
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4485
3
            col_res_offsets.push_back(col_res_chars.size());
4486
3
        }
4487
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4456
1
                             size_t input_rows_count) {
4457
1
        auto& col_res_chars = col_res->get_chars();
4458
1
        auto& col_res_offsets = col_res->get_offsets();
4459
1
        StringRef origin_str, insert_str;
4460
31
        for (size_t i = 0; i < input_rows_count; i++) {
4461
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4462
            // pos is 1-based index,so we need to minus 1
4463
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4464
30
            const auto len = col_len[index_check_const<len_const>(i)];
4465
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4466
30
            const auto origin_size = origin_str.size;
4467
30
            if (pos >= origin_size || pos < 0) {
4468
                // If pos is not within the length of the string, the original string is returned.
4469
5
                col_res->insert_data(origin_str.data, origin_str.size);
4470
5
                continue;
4471
5
            }
4472
25
            col_res_chars.insert(origin_str.data,
4473
25
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4474
25
            if (pos + len > origin_size || len < 0) {
4475
3
                col_res_chars.insert(insert_str.begin(),
4476
3
                                     insert_str.end()); // copy all of insert_str.
4477
22
            } else {
4478
22
                col_res_chars.insert(insert_str.begin(),
4479
22
                                     insert_str.end()); // copy all of insert_str.
4480
22
                col_res_chars.insert(
4481
22
                        origin_str.data + pos + len,
4482
22
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4483
22
            }
4484
25
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4485
25
            col_res_offsets.push_back(col_res_chars.size());
4486
25
        }
4487
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
4488
4489
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
4490
    NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin,
4491
                                                  int const* col_pos, int const* col_len,
4492
                                                  const ColumnString* col_insert,
4493
                                                  ColumnString::MutablePtr& col_res,
4494
10
                                                  size_t input_rows_count) {
4495
10
        auto& col_res_chars = col_res->get_chars();
4496
10
        auto& col_res_offsets = col_res->get_offsets();
4497
10
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
10
        std::vector<size_t> utf8_origin_offsets;
4501
122
        for (size_t i = 0; i < input_rows_count; i++) {
4502
112
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
112
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
112
            const auto len = col_len[index_check_const<len_const>(i)];
4506
112
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
112
            utf8_origin_offsets.clear();
4508
4509
787
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
675
                utf8_origin_offsets.push_back(ni);
4511
675
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
675
            }
4513
4514
112
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
112
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
35
                col_res->insert_data(origin_str.data, origin_str.size);
4519
35
                continue;
4520
35
            }
4521
77
            col_res_chars.insert(
4522
77
                    origin_str.data,
4523
77
                    origin_str.data +
4524
77
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
77
            if (pos + len >= utf8_origin_size || len < 0) {
4526
24
                col_res_chars.insert(insert_str.begin(),
4527
24
                                     insert_str.end()); // copy all of insert_str.
4528
53
            } else {
4529
53
                col_res_chars.insert(insert_str.begin(),
4530
53
                                     insert_str.end()); // copy all of insert_str.
4531
53
                col_res_chars.insert(
4532
53
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
53
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
53
            }
4535
77
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
77
            col_res_offsets.push_back(col_res_chars.size());
4537
77
        }
4538
10
    }
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4494
2
                                                  size_t input_rows_count) {
4495
2
        auto& col_res_chars = col_res->get_chars();
4496
2
        auto& col_res_offsets = col_res->get_offsets();
4497
2
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
2
        std::vector<size_t> utf8_origin_offsets;
4501
48
        for (size_t i = 0; i < input_rows_count; i++) {
4502
46
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
46
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
46
            const auto len = col_len[index_check_const<len_const>(i)];
4506
46
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
46
            utf8_origin_offsets.clear();
4508
4509
325
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
279
                utf8_origin_offsets.push_back(ni);
4511
279
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
279
            }
4513
4514
46
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
46
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
19
                col_res->insert_data(origin_str.data, origin_str.size);
4519
19
                continue;
4520
19
            }
4521
27
            col_res_chars.insert(
4522
27
                    origin_str.data,
4523
27
                    origin_str.data +
4524
27
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
27
            if (pos + len >= utf8_origin_size || len < 0) {
4526
12
                col_res_chars.insert(insert_str.begin(),
4527
12
                                     insert_str.end()); // copy all of insert_str.
4528
15
            } else {
4529
15
                col_res_chars.insert(insert_str.begin(),
4530
15
                                     insert_str.end()); // copy all of insert_str.
4531
15
                col_res_chars.insert(
4532
15
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
15
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
15
            }
4535
27
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
27
            col_res_offsets.push_back(col_res_chars.size());
4537
27
        }
4538
2
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4494
1
                                                  size_t input_rows_count) {
4495
1
        auto& col_res_chars = col_res->get_chars();
4496
1
        auto& col_res_offsets = col_res->get_offsets();
4497
1
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
1
        std::vector<size_t> utf8_origin_offsets;
4501
31
        for (size_t i = 0; i < input_rows_count; i++) {
4502
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
30
            const auto len = col_len[index_check_const<len_const>(i)];
4506
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
30
            utf8_origin_offsets.clear();
4508
4509
209
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
179
                utf8_origin_offsets.push_back(ni);
4511
179
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
179
            }
4513
4514
30
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
30
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
9
                col_res->insert_data(origin_str.data, origin_str.size);
4519
9
                continue;
4520
9
            }
4521
21
            col_res_chars.insert(
4522
21
                    origin_str.data,
4523
21
                    origin_str.data +
4524
21
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
21
            if (pos + len >= utf8_origin_size || len < 0) {
4526
3
                col_res_chars.insert(insert_str.begin(),
4527
3
                                     insert_str.end()); // copy all of insert_str.
4528
18
            } else {
4529
18
                col_res_chars.insert(insert_str.begin(),
4530
18
                                     insert_str.end()); // copy all of insert_str.
4531
18
                col_res_chars.insert(
4532
18
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
18
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
18
            }
4535
21
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
21
            col_res_offsets.push_back(col_res_chars.size());
4537
21
        }
4538
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4494
1
                                                  size_t input_rows_count) {
4495
1
        auto& col_res_chars = col_res->get_chars();
4496
1
        auto& col_res_offsets = col_res->get_offsets();
4497
1
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
1
        std::vector<size_t> utf8_origin_offsets;
4501
31
        for (size_t i = 0; i < input_rows_count; i++) {
4502
30
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
30
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
30
            const auto len = col_len[index_check_const<len_const>(i)];
4506
30
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
30
            utf8_origin_offsets.clear();
4508
4509
209
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
179
                utf8_origin_offsets.push_back(ni);
4511
179
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
179
            }
4513
4514
30
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
30
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
4
                col_res->insert_data(origin_str.data, origin_str.size);
4519
4
                continue;
4520
4
            }
4521
26
            col_res_chars.insert(
4522
26
                    origin_str.data,
4523
26
                    origin_str.data +
4524
26
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
26
            if (pos + len >= utf8_origin_size || len < 0) {
4526
8
                col_res_chars.insert(insert_str.begin(),
4527
8
                                     insert_str.end()); // copy all of insert_str.
4528
18
            } else {
4529
18
                col_res_chars.insert(insert_str.begin(),
4530
18
                                     insert_str.end()); // copy all of insert_str.
4531
18
                col_res_chars.insert(
4532
18
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
18
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
18
            }
4535
26
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
26
            col_res_offsets.push_back(col_res_chars.size());
4537
26
        }
4538
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4494
6
                                                  size_t input_rows_count) {
4495
6
        auto& col_res_chars = col_res->get_chars();
4496
6
        auto& col_res_offsets = col_res->get_offsets();
4497
6
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
6
        std::vector<size_t> utf8_origin_offsets;
4501
12
        for (size_t i = 0; i < input_rows_count; i++) {
4502
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
6
            const auto len = col_len[index_check_const<len_const>(i)];
4506
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
6
            utf8_origin_offsets.clear();
4508
4509
44
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
38
                utf8_origin_offsets.push_back(ni);
4511
38
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
38
            }
4513
4514
6
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
6
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
3
                col_res->insert_data(origin_str.data, origin_str.size);
4519
3
                continue;
4520
3
            }
4521
3
            col_res_chars.insert(
4522
3
                    origin_str.data,
4523
3
                    origin_str.data +
4524
3
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
3
            if (pos + len >= utf8_origin_size || len < 0) {
4526
1
                col_res_chars.insert(insert_str.begin(),
4527
1
                                     insert_str.end()); // copy all of insert_str.
4528
2
            } else {
4529
2
                col_res_chars.insert(insert_str.begin(),
4530
2
                                     insert_str.end()); // copy all of insert_str.
4531
2
                col_res_chars.insert(
4532
2
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
2
            }
4535
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
3
            col_res_offsets.push_back(col_res_chars.size());
4537
3
        }
4538
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
4539
};
4540
4541
class FunctionNgramSearch : public IFunction {
4542
public:
4543
    static constexpr auto name = "ngram_search";
4544
24
    static FunctionPtr create() { return std::make_shared<FunctionNgramSearch>(); }
4545
1
    String get_name() const override { return name; }
4546
15
    size_t get_number_of_arguments() const override { return 3; }
4547
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4548
15
        return std::make_shared<DataTypeFloat64>();
4549
15
    }
4550
4551
    // ngram_search(text,pattern,gram_num)
4552
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4553
14
                        uint32_t result, size_t input_rows_count) const override {
4554
14
        CHECK_EQ(arguments.size(), 3);
4555
14
        auto col_res = ColumnFloat64::create();
4556
14
        bool col_const[3];
4557
14
        ColumnPtr argument_columns[3];
4558
56
        for (int i = 0; i < 3; ++i) {
4559
42
            std::tie(argument_columns[i], col_const[i]) =
4560
42
                    unpack_if_const(block.get_by_position(arguments[i]).column);
4561
42
        }
4562
        // There is no need to check if the 2-th,3-th parameters are const here because fe has already checked them.
4563
14
        auto pattern = assert_cast<const ColumnString*>(argument_columns[1].get())->get_data_at(0);
4564
14
        auto gram_num = assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_element(0);
4565
14
        const auto* text_col = assert_cast<const ColumnString*>(argument_columns[0].get());
4566
4567
14
        if (col_const[0]) {
4568
0
            _execute_impl<true>(text_col, pattern, gram_num, *col_res, input_rows_count);
4569
14
        } else {
4570
14
            _execute_impl<false>(text_col, pattern, gram_num, *col_res, input_rows_count);
4571
14
        }
4572
4573
14
        block.replace_by_position(result, std::move(col_res));
4574
14
        return Status::OK();
4575
14
    }
4576
4577
private:
4578
    using NgramMap = phmap::flat_hash_map<uint32_t, uint8_t>;
4579
    // In the map, the key is the CRC32 hash result of a substring in the string,
4580
    // and the value indicates whether this hash is found in the text or pattern.
4581
    constexpr static auto not_found = 0b00;
4582
    constexpr static auto found_in_pattern = 0b01;
4583
    constexpr static auto found_in_text = 0b10;
4584
    constexpr static auto found_in_pattern_and_text = 0b11;
4585
4586
173
    uint32_t sub_str_hash(const char* data, int32_t length) const {
4587
173
        constexpr static uint32_t seed = 0;
4588
173
        return crc32c::Extend(seed, (const uint8_t*)data, length);
4589
173
    }
4590
4591
    template <bool column_const>
4592
    void _execute_impl(const ColumnString* text_col, StringRef& pattern, int gram_num,
4593
14
                       ColumnFloat64& res, size_t size) const {
4594
14
        auto& res_data = res.get_data();
4595
14
        res_data.resize_fill(size, 0);
4596
        // If the length of the pattern is less than gram_num, return 0.
4597
14
        if (pattern.size < gram_num) {
4598
0
            return;
4599
0
        }
4600
4601
        // Build a map by pattern string, which will be used repeatedly in the following loop.
4602
14
        NgramMap pattern_map;
4603
14
        int pattern_count = get_pattern_set(pattern_map, pattern, gram_num);
4604
        // Each time a loop is executed, the map will be modified, so it needs to be restored afterward.
4605
14
        std::vector<uint32_t> restore_map;
4606
4607
35
        for (int i = 0; i < size; i++) {
4608
21
            auto text = text_col->get_data_at(index_check_const<column_const>(i));
4609
21
            if (text.size < gram_num) {
4610
                // If the length of the text is less than gram_num, return 0.
4611
4
                continue;
4612
4
            }
4613
17
            restore_map.reserve(text.size);
4614
17
            auto [text_count, intersection_count] =
4615
17
                    get_text_set(text, gram_num, pattern_map, restore_map);
4616
4617
            // 2 * |Intersection| / (|text substr set| + |pattern substr set|)
4618
17
            res_data[i] = 2.0 * intersection_count / (text_count + pattern_count);
4619
17
        }
4620
14
    }
Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb1EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm
_ZNK5doris19FunctionNgramSearch13_execute_implILb0EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm
Line
Count
Source
4593
14
                       ColumnFloat64& res, size_t size) const {
4594
14
        auto& res_data = res.get_data();
4595
14
        res_data.resize_fill(size, 0);
4596
        // If the length of the pattern is less than gram_num, return 0.
4597
14
        if (pattern.size < gram_num) {
4598
0
            return;
4599
0
        }
4600
4601
        // Build a map by pattern string, which will be used repeatedly in the following loop.
4602
14
        NgramMap pattern_map;
4603
14
        int pattern_count = get_pattern_set(pattern_map, pattern, gram_num);
4604
        // Each time a loop is executed, the map will be modified, so it needs to be restored afterward.
4605
14
        std::vector<uint32_t> restore_map;
4606
4607
35
        for (int i = 0; i < size; i++) {
4608
21
            auto text = text_col->get_data_at(index_check_const<column_const>(i));
4609
21
            if (text.size < gram_num) {
4610
                // If the length of the text is less than gram_num, return 0.
4611
4
                continue;
4612
4
            }
4613
17
            restore_map.reserve(text.size);
4614
17
            auto [text_count, intersection_count] =
4615
17
                    get_text_set(text, gram_num, pattern_map, restore_map);
4616
4617
            // 2 * |Intersection| / (|text substr set| + |pattern substr set|)
4618
17
            res_data[i] = 2.0 * intersection_count / (text_count + pattern_count);
4619
17
        }
4620
14
    }
4621
4622
14
    size_t get_pattern_set(NgramMap& pattern_map, StringRef& pattern, int gram_num) const {
4623
14
        size_t pattern_count = 0;
4624
87
        for (int i = 0; i + gram_num <= pattern.size; i++) {
4625
73
            uint32_t cur_hash = sub_str_hash(pattern.data + i, gram_num);
4626
73
            if (!pattern_map.contains(cur_hash)) {
4627
43
                pattern_map[cur_hash] = found_in_pattern;
4628
43
                pattern_count++;
4629
43
            }
4630
73
        }
4631
14
        return pattern_count;
4632
14
    }
4633
4634
    std::pair<size_t, size_t> get_text_set(StringRef& text, int gram_num, NgramMap& pattern_map,
4635
17
                                           std::vector<uint32_t>& restore_map) const {
4636
17
        restore_map.clear();
4637
        //intersection_count indicates a substring both in pattern and text.
4638
17
        size_t text_count = 0, intersection_count = 0;
4639
117
        for (int i = 0; i + gram_num <= text.size; i++) {
4640
100
            uint32_t cur_hash = sub_str_hash(text.data + i, gram_num);
4641
100
            auto& val = pattern_map[cur_hash];
4642
100
            if (val == not_found) {
4643
26
                val ^= found_in_text;
4644
26
                DCHECK(val == found_in_text);
4645
                // only found in text
4646
26
                text_count++;
4647
26
                restore_map.push_back(cur_hash);
4648
74
            } else if (val == found_in_pattern) {
4649
39
                val ^= found_in_text;
4650
39
                DCHECK(val == found_in_pattern_and_text);
4651
                // found in text and pattern
4652
39
                text_count++;
4653
39
                intersection_count++;
4654
39
                restore_map.push_back(cur_hash);
4655
39
            }
4656
100
        }
4657
        // Restore the pattern_map.
4658
65
        for (auto& restore_hash : restore_map) {
4659
65
            pattern_map[restore_hash] ^= found_in_text;
4660
65
        }
4661
4662
17
        return {text_count, intersection_count};
4663
17
    }
4664
};
4665
4666
class FunctionTranslate : public IFunction {
4667
public:
4668
    static constexpr auto name = "translate";
4669
    using AsciiMap = std::array<UInt8, 128>;
4670
    constexpr static UInt8 DELETE_CHAR = 255; // 255 means delete this char
4671
97
    static FunctionPtr create() { return std::make_shared<FunctionTranslate>(); }
4672
1
    String get_name() const override { return name; }
4673
88
    size_t get_number_of_arguments() const override { return 3; }
4674
4675
88
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4676
88
        return std::make_shared<DataTypeString>();
4677
88
    };
4678
4679
8
    DataTypes get_variadic_argument_types_impl() const override {
4680
8
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
4681
8
                std::make_shared<DataTypeString>()};
4682
8
    }
4683
4684
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4685
161
                        uint32_t result, size_t input_rows_count) const override {
4686
161
        CHECK_EQ(arguments.size(), 3);
4687
161
        auto col_res = ColumnString::create();
4688
161
        bool col_const[3];
4689
161
        ColumnPtr argument_columns[3];
4690
644
        for (int i = 0; i < 3; ++i) {
4691
483
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
4692
483
        }
4693
161
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
4694
20
                                                     *block.get_by_position(arguments[0]).column)
4695
20
                                                     .convert_to_full_column()
4696
161
                                           : block.get_by_position(arguments[0]).column;
4697
161
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
4698
4699
161
        const auto* col_source = assert_cast<const ColumnString*>(argument_columns[0].get());
4700
161
        const auto* col_from = assert_cast<const ColumnString*>(argument_columns[1].get());
4701
161
        const auto* col_to = assert_cast<const ColumnString*>(argument_columns[2].get());
4702
4703
161
        bool is_ascii = col_source->is_ascii() && col_from->is_ascii() && col_to->is_ascii();
4704
161
        auto impl_vectors = impl_vectors_utf8<false>;
4705
161
        if (col_const[1] && col_const[2] && is_ascii) {
4706
34
            impl_vectors = impl_vectors_ascii<true>;
4707
127
        } else if (col_const[1] && col_const[2]) {
4708
1
            impl_vectors = impl_vectors_utf8<true>;
4709
126
        } else if (is_ascii) {
4710
88
            impl_vectors = impl_vectors_ascii<false>;
4711
88
        }
4712
161
        impl_vectors(col_source, col_from, col_to, col_res.get());
4713
161
        block.get_by_position(result).column = std::move(col_res);
4714
161
        return Status::OK();
4715
161
    }
4716
4717
private:
4718
    template <bool IsConst>
4719
    static void impl_vectors_ascii(const ColumnString* col_source, const ColumnString* col_from,
4720
122
                                   const ColumnString* col_to, ColumnString* col_res) {
4721
122
        auto& res_chars = col_res->get_chars();
4722
122
        auto& res_offsets = col_res->get_offsets();
4723
122
        res_chars.reserve(col_source->get_chars().size());
4724
122
        res_offsets.reserve(col_source->get_offsets().size());
4725
122
        DCHECK_EQ(col_res->size(), 0);
4726
122
        AsciiMap map;
4727
122
        if (IsConst) {
4728
34
            const auto& from_str = col_from->get_data_at(0);
4729
34
            const auto& to_str = col_to->get_data_at(0);
4730
34
            if (!build_translate_map_ascii(map, from_str, to_str)) {
4731
                // if the map is not need delete char, we can directly copy the source string,then use map to translate
4732
24
                res_offsets.insert(col_source->get_offsets().begin(),
4733
24
                                   col_source->get_offsets().end());
4734
24
                res_chars.insert(col_source->get_chars().begin(), col_source->get_chars().end());
4735
214
                for (int i = 0; i < res_chars.size(); ++i) {
4736
190
                    res_chars[i] = map[res_chars[i]]; // translate the chars
4737
190
                }
4738
24
                return; // no need to translate
4739
24
            }
4740
34
        }
4741
4742
98
        auto res_size = 0;
4743
98
        auto* begin_data = col_res->get_chars().data();
4744
216
        for (size_t i = 0; i < col_source->size(); ++i) {
4745
118
            const auto& source_str = col_source->get_data_at(i);
4746
118
            if (!IsConst) {
4747
104
                const auto& from_str = col_from->get_data_at(i);
4748
104
                const auto& to_str = col_to->get_data_at(i);
4749
104
                build_translate_map_ascii(map, from_str, to_str);
4750
104
            }
4751
118
            auto* dst_data = begin_data + res_size;
4752
118
            res_size += translate_ascii(source_str, map, dst_data);
4753
4754
118
            res_offsets.push_back(res_size);
4755
118
        }
4756
98
        DCHECK_GE(res_chars.capacity(), res_size);
4757
98
        res_chars.resize(res_size);
4758
98
    }
_ZN5doris17FunctionTranslate18impl_vectors_asciiILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Line
Count
Source
4720
34
                                   const ColumnString* col_to, ColumnString* col_res) {
4721
34
        auto& res_chars = col_res->get_chars();
4722
34
        auto& res_offsets = col_res->get_offsets();
4723
34
        res_chars.reserve(col_source->get_chars().size());
4724
34
        res_offsets.reserve(col_source->get_offsets().size());
4725
34
        DCHECK_EQ(col_res->size(), 0);
4726
34
        AsciiMap map;
4727
34
        if (IsConst) {
4728
34
            const auto& from_str = col_from->get_data_at(0);
4729
34
            const auto& to_str = col_to->get_data_at(0);
4730
34
            if (!build_translate_map_ascii(map, from_str, to_str)) {
4731
                // if the map is not need delete char, we can directly copy the source string,then use map to translate
4732
24
                res_offsets.insert(col_source->get_offsets().begin(),
4733
24
                                   col_source->get_offsets().end());
4734
24
                res_chars.insert(col_source->get_chars().begin(), col_source->get_chars().end());
4735
214
                for (int i = 0; i < res_chars.size(); ++i) {
4736
190
                    res_chars[i] = map[res_chars[i]]; // translate the chars
4737
190
                }
4738
24
                return; // no need to translate
4739
24
            }
4740
34
        }
4741
4742
10
        auto res_size = 0;
4743
10
        auto* begin_data = col_res->get_chars().data();
4744
24
        for (size_t i = 0; i < col_source->size(); ++i) {
4745
14
            const auto& source_str = col_source->get_data_at(i);
4746
14
            if (!IsConst) {
4747
0
                const auto& from_str = col_from->get_data_at(i);
4748
0
                const auto& to_str = col_to->get_data_at(i);
4749
0
                build_translate_map_ascii(map, from_str, to_str);
4750
0
            }
4751
14
            auto* dst_data = begin_data + res_size;
4752
14
            res_size += translate_ascii(source_str, map, dst_data);
4753
4754
14
            res_offsets.push_back(res_size);
4755
14
        }
4756
        DCHECK_GE(res_chars.capacity(), res_size);
4757
10
        res_chars.resize(res_size);
4758
10
    }
_ZN5doris17FunctionTranslate18impl_vectors_asciiILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Line
Count
Source
4720
88
                                   const ColumnString* col_to, ColumnString* col_res) {
4721
88
        auto& res_chars = col_res->get_chars();
4722
88
        auto& res_offsets = col_res->get_offsets();
4723
88
        res_chars.reserve(col_source->get_chars().size());
4724
88
        res_offsets.reserve(col_source->get_offsets().size());
4725
88
        DCHECK_EQ(col_res->size(), 0);
4726
88
        AsciiMap map;
4727
88
        if (IsConst) {
4728
0
            const auto& from_str = col_from->get_data_at(0);
4729
0
            const auto& to_str = col_to->get_data_at(0);
4730
0
            if (!build_translate_map_ascii(map, from_str, to_str)) {
4731
                // if the map is not need delete char, we can directly copy the source string,then use map to translate
4732
0
                res_offsets.insert(col_source->get_offsets().begin(),
4733
0
                                   col_source->get_offsets().end());
4734
0
                res_chars.insert(col_source->get_chars().begin(), col_source->get_chars().end());
4735
0
                for (int i = 0; i < res_chars.size(); ++i) {
4736
0
                    res_chars[i] = map[res_chars[i]]; // translate the chars
4737
0
                }
4738
0
                return; // no need to translate
4739
0
            }
4740
0
        }
4741
4742
88
        auto res_size = 0;
4743
88
        auto* begin_data = col_res->get_chars().data();
4744
192
        for (size_t i = 0; i < col_source->size(); ++i) {
4745
104
            const auto& source_str = col_source->get_data_at(i);
4746
104
            if (!IsConst) {
4747
104
                const auto& from_str = col_from->get_data_at(i);
4748
104
                const auto& to_str = col_to->get_data_at(i);
4749
104
                build_translate_map_ascii(map, from_str, to_str);
4750
104
            }
4751
104
            auto* dst_data = begin_data + res_size;
4752
104
            res_size += translate_ascii(source_str, map, dst_data);
4753
4754
104
            res_offsets.push_back(res_size);
4755
104
        }
4756
        DCHECK_GE(res_chars.capacity(), res_size);
4757
88
        res_chars.resize(res_size);
4758
88
    }
4759
4760
    // return true if no need delete char
4761
    bool static build_translate_map_ascii(AsciiMap& map, const StringRef& from_str,
4762
138
                                          const StringRef& to_str) {
4763
17.8k
        for (size_t i = 0; i < map.size(); ++i) {
4764
17.6k
            map[i] = i; // initialize map to identity
4765
17.6k
        }
4766
138
        std::array<UInt8, 128> set_map {0};
4767
138
        const auto min_size = std::min(from_str.size, to_str.size);
4768
        // all ascii characters are in the range [0, 127]
4769
476
        for (size_t i = 0; i < min_size; ++i) {
4770
338
            auto from_char = from_str.data[i];
4771
338
            auto to_char = to_str.data[i];
4772
338
            if (set_map[from_char] == 0) {
4773
243
                set_map[from_char] = 1;
4774
243
                map[from_char] = to_char;
4775
243
            }
4776
338
        }
4777
4778
138
        bool need_delete_char = false;
4779
4780
207
        for (size_t i = min_size; i < from_str.size; ++i) {
4781
69
            auto from_char = from_str.data[i];
4782
69
            if (set_map[from_char] == 0) {
4783
57
                set_map[from_char] = 1;
4784
57
                map[from_char] = DELETE_CHAR; // delete this char
4785
57
                need_delete_char = true;
4786
57
            }
4787
69
        }
4788
138
        return need_delete_char;
4789
138
    }
4790
4791
118
    static size_t translate_ascii(const StringRef& source_str, AsciiMap& map, UInt8* dst_data) {
4792
118
        auto* begin_data = dst_data;
4793
640
        for (size_t i = 0; i < source_str.size; ++i) {
4794
522
            auto c = source_str.data[i];
4795
522
            if (map[c] == DELETE_CHAR) {
4796
35
                continue; // delete this char
4797
35
            }
4798
487
            *dst_data++ = map[c];
4799
487
        }
4800
118
        return dst_data - begin_data;
4801
118
    }
4802
4803
    template <bool IsConst>
4804
    static void impl_vectors_utf8(const ColumnString* col_source, const ColumnString* col_from,
4805
39
                                  const ColumnString* col_to, ColumnString* col_res) {
4806
39
        col_res->get_chars().reserve(col_source->get_chars().size());
4807
39
        col_res->get_offsets().reserve(col_source->get_offsets().size());
4808
39
        std::unordered_map<std::string_view, std::string_view> translate_map;
4809
39
        if (IsConst) {
4810
1
            const auto& from_str = col_from->get_data_at(0);
4811
1
            const auto& to_str = col_to->get_data_at(0);
4812
1
            translate_map =
4813
1
                    build_translate_map_utf8(from_str.to_string_view(), to_str.to_string_view());
4814
1
        }
4815
372
        for (size_t i = 0; i < col_source->size(); ++i) {
4816
333
            const auto& source_str = col_source->get_data_at(i);
4817
333
            if (!IsConst) {
4818
332
                const auto& from_str = col_from->get_data_at(i);
4819
332
                const auto& to_str = col_to->get_data_at(i);
4820
332
                translate_map = build_translate_map_utf8(from_str.to_string_view(),
4821
332
                                                         to_str.to_string_view());
4822
332
            }
4823
333
            auto translated_str = translate_utf8(source_str.to_string_view(), translate_map);
4824
333
            col_res->insert_data(translated_str.data(), translated_str.size());
4825
333
        }
4826
39
    }
_ZN5doris17FunctionTranslate17impl_vectors_utf8ILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Line
Count
Source
4805
38
                                  const ColumnString* col_to, ColumnString* col_res) {
4806
38
        col_res->get_chars().reserve(col_source->get_chars().size());
4807
38
        col_res->get_offsets().reserve(col_source->get_offsets().size());
4808
38
        std::unordered_map<std::string_view, std::string_view> translate_map;
4809
38
        if (IsConst) {
4810
0
            const auto& from_str = col_from->get_data_at(0);
4811
0
            const auto& to_str = col_to->get_data_at(0);
4812
0
            translate_map =
4813
0
                    build_translate_map_utf8(from_str.to_string_view(), to_str.to_string_view());
4814
0
        }
4815
370
        for (size_t i = 0; i < col_source->size(); ++i) {
4816
332
            const auto& source_str = col_source->get_data_at(i);
4817
332
            if (!IsConst) {
4818
332
                const auto& from_str = col_from->get_data_at(i);
4819
332
                const auto& to_str = col_to->get_data_at(i);
4820
332
                translate_map = build_translate_map_utf8(from_str.to_string_view(),
4821
332
                                                         to_str.to_string_view());
4822
332
            }
4823
332
            auto translated_str = translate_utf8(source_str.to_string_view(), translate_map);
4824
332
            col_res->insert_data(translated_str.data(), translated_str.size());
4825
332
        }
4826
38
    }
_ZN5doris17FunctionTranslate17impl_vectors_utf8ILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Line
Count
Source
4805
1
                                  const ColumnString* col_to, ColumnString* col_res) {
4806
1
        col_res->get_chars().reserve(col_source->get_chars().size());
4807
1
        col_res->get_offsets().reserve(col_source->get_offsets().size());
4808
1
        std::unordered_map<std::string_view, std::string_view> translate_map;
4809
1
        if (IsConst) {
4810
1
            const auto& from_str = col_from->get_data_at(0);
4811
1
            const auto& to_str = col_to->get_data_at(0);
4812
1
            translate_map =
4813
1
                    build_translate_map_utf8(from_str.to_string_view(), to_str.to_string_view());
4814
1
        }
4815
2
        for (size_t i = 0; i < col_source->size(); ++i) {
4816
1
            const auto& source_str = col_source->get_data_at(i);
4817
1
            if (!IsConst) {
4818
0
                const auto& from_str = col_from->get_data_at(i);
4819
0
                const auto& to_str = col_to->get_data_at(i);
4820
0
                translate_map = build_translate_map_utf8(from_str.to_string_view(),
4821
0
                                                         to_str.to_string_view());
4822
0
            }
4823
1
            auto translated_str = translate_utf8(source_str.to_string_view(), translate_map);
4824
1
            col_res->insert_data(translated_str.data(), translated_str.size());
4825
1
        }
4826
1
    }
4827
4828
    static std::unordered_map<std::string_view, std::string_view> build_translate_map_utf8(
4829
333
            const std::string_view& from_str, const std::string_view& to_str) {
4830
333
        std::unordered_map<std::string_view, std::string_view> translate_map;
4831
1.78k
        for (size_t i = 0, from_char_size = 0, j = 0, to_char_size = 0; i < from_str.size();
4832
1.44k
             i += from_char_size, j += to_char_size) {
4833
1.44k
            from_char_size = get_utf8_byte_length(from_str[i]);
4834
1.44k
            to_char_size = j < to_str.size() ? get_utf8_byte_length(to_str[j]) : 0;
4835
1.44k
            auto from_char = from_str.substr(i, from_char_size);
4836
1.44k
            if (translate_map.find(from_char) == translate_map.end()) {
4837
792
                translate_map[from_char] =
4838
792
                        j < to_str.size() ? to_str.substr(j, to_char_size) : std::string_view();
4839
792
            }
4840
1.44k
        }
4841
333
        return translate_map;
4842
333
    }
4843
4844
    static std::string translate_utf8(
4845
            const std::string_view& source_str,
4846
333
            std::unordered_map<std::string_view, std::string_view>& translate_map) {
4847
333
        std::string result;
4848
333
        result.reserve(source_str.size());
4849
1.96k
        for (size_t i = 0, char_size = 0; i < source_str.size(); i += char_size) {
4850
1.63k
            char_size = get_utf8_byte_length(source_str[i]);
4851
1.63k
            auto c = source_str.substr(i, char_size);
4852
1.63k
            if (translate_map.find(c) != translate_map.end()) {
4853
255
                if (!translate_map[c].empty()) {
4854
159
                    result.append(translate_map[c]);
4855
159
                }
4856
1.37k
            } else {
4857
1.37k
                result.append(c);
4858
1.37k
            }
4859
1.63k
        }
4860
333
        return result;
4861
333
    }
4862
};
4863
4864
/// xpath_string(xml, xpath) -> String
4865
/// Returns the text content of the first node that matches the XPath expression.
4866
/// Returns NULL if either xml or xpath is NULL.
4867
/// Returns empty string if the XPath expression matches no nodes.
4868
/// The text content includes the node and all its descendants.
4869
/// Example:
4870
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1'
4871
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2'
4872
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = ''
4873
///   xpath_string('invalid xml', '/a/b[1]') = NULL
4874
///   xpath_string(NULL, '/a/b[1]') = NULL
4875
///   xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL
4876
class FunctionXPathString : public IFunction {
4877
public:
4878
    static constexpr auto name = "xpath_string";
4879
173
    static FunctionPtr create() { return std::make_shared<FunctionXPathString>(); }
4880
1
    String get_name() const override { return name; }
4881
164
    size_t get_number_of_arguments() const override { return 2; }
4882
164
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4883
164
        return make_nullable(std::make_shared<DataTypeString>());
4884
164
    }
4885
4886
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4887
246
                        uint32_t result, size_t input_rows_count) const override {
4888
246
        CHECK_EQ(arguments.size(), 2);
4889
246
        auto col_res = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create());
4890
246
        const auto& [left_col, left_const] =
4891
246
                unpack_if_const(block.get_by_position(arguments[0]).column);
4892
246
        const auto& [right_col, right_const] =
4893
246
                unpack_if_const(block.get_by_position(arguments[1]).column);
4894
246
        const auto& xml_col = *assert_cast<const ColumnString*>(left_col.get());
4895
246
        const auto& xpath_col = *assert_cast<const ColumnString*>(right_col.get());
4896
4897
246
        Status status;
4898
246
        if (left_const && right_const) {
4899
0
            status = execute_vector<true, true>(input_rows_count, xml_col, xpath_col, *col_res);
4900
246
        } else if (left_const) {
4901
42
            status = execute_vector<true, false>(input_rows_count, xml_col, xpath_col, *col_res);
4902
204
        } else if (right_const) {
4903
51
            status = execute_vector<false, true>(input_rows_count, xml_col, xpath_col, *col_res);
4904
153
        } else {
4905
153
            status = execute_vector<false, false>(input_rows_count, xml_col, xpath_col, *col_res);
4906
153
        }
4907
246
        if (!status.ok()) {
4908
1
            return status;
4909
1
        }
4910
4911
245
        block.get_by_position(result).column = std::move(col_res);
4912
245
        return Status::OK();
4913
246
    }
4914
4915
private:
4916
331
    static Status parse_xml(const StringRef& xml_str, pugi::xml_document& xml_doc) {
4917
331
        pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data, xml_str.size);
4918
331
        if (!result) {
4919
1
            return Status::InvalidArgument("Function {} failed to parse XML string: {}", name,
4920
1
                                           result.description());
4921
1
        }
4922
330
        return Status::OK();
4923
331
    }
4924
4925
340
    static Status build_xpath_query(const StringRef& xpath_str, pugi::xpath_query& xpath_query) {
4926
        // xpath_query will throws xpath_exception on compilation errors.
4927
340
        try {
4928
            // NOTE!!!: don't use to_string_view(), because xpath_str maybe not null-terminated
4929
340
            xpath_query = pugi::xpath_query(xpath_str.to_string().c_str());
4930
340
        } catch (const pugi::xpath_exception& e) {
4931
0
            return Status::InvalidArgument("Function {} failed to build XPath query: {}", name,
4932
0
                                           e.what());
4933
0
        }
4934
340
        return Status::OK();
4935
340
    }
4936
4937
    template <bool left_const, bool right_const>
4938
    static Status execute_vector(const size_t input_rows_count, const ColumnString& xml_col,
4939
246
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
246
        pugi::xml_document xml_doc;
4941
246
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
246
        if constexpr (right_const) {
4944
51
            auto xpath_str = xpath_col.get_data_at(0);
4945
51
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
1
                res_col.insert_many_defaults(input_rows_count);
4948
1
                return Status::OK();
4949
1
            }
4950
50
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
50
        }
4952
50
        if constexpr (left_const) {
4953
42
            auto xml_str = xml_col.get_data_at(0);
4954
42
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
1
                res_col.insert_many_defaults(input_rows_count);
4957
1
                return Status::OK();
4958
1
            }
4959
41
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
41
        }
4961
4962
633
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
388
            if constexpr (!right_const) {
4964
308
                auto xpath_str = xpath_col.get_data_at(i);
4965
308
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
18
                    res_col.insert_default();
4968
18
                    continue;
4969
18
                }
4970
290
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
290
            }
4972
327
            if constexpr (!left_const) {
4973
327
                auto xml_str = xml_col.get_data_at(i);
4974
327
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
20
                    res_col.insert_default();
4977
20
                    continue;
4978
20
                }
4979
307
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
307
            }
4981
306
            std::string text;
4982
388
            try {
4983
388
                text = xpath_query.evaluate_string(xml_doc);
4984
388
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
349
            res_col.insert_data(text.data(), text.size());
4989
349
        }
4990
245
        return Status::OK();
4991
246
    }
Unexecuted instantiation: _ZN5doris19FunctionXPathString14execute_vectorILb1ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
_ZN5doris19FunctionXPathString14execute_vectorILb1ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
4939
42
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
42
        pugi::xml_document xml_doc;
4941
42
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
        if constexpr (right_const) {
4944
            auto xpath_str = xpath_col.get_data_at(0);
4945
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
                res_col.insert_many_defaults(input_rows_count);
4948
                return Status::OK();
4949
            }
4950
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
        }
4952
42
        if constexpr (left_const) {
4953
42
            auto xml_str = xml_col.get_data_at(0);
4954
42
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
1
                res_col.insert_many_defaults(input_rows_count);
4957
1
                return Status::OK();
4958
1
            }
4959
41
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
41
        }
4961
4962
103
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
61
            if constexpr (!right_const) {
4964
61
                auto xpath_str = xpath_col.get_data_at(i);
4965
61
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
1
                    res_col.insert_default();
4968
1
                    continue;
4969
1
                }
4970
60
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
60
            }
4972
            if constexpr (!left_const) {
4973
                auto xml_str = xml_col.get_data_at(i);
4974
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
                    res_col.insert_default();
4977
                    continue;
4978
                }
4979
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
            }
4981
61
            std::string text;
4982
61
            try {
4983
61
                text = xpath_query.evaluate_string(xml_doc);
4984
61
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
60
            res_col.insert_data(text.data(), text.size());
4989
60
        }
4990
42
        return Status::OK();
4991
42
    }
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
4939
51
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
51
        pugi::xml_document xml_doc;
4941
51
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
51
        if constexpr (right_const) {
4944
51
            auto xpath_str = xpath_col.get_data_at(0);
4945
51
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
1
                res_col.insert_many_defaults(input_rows_count);
4948
1
                return Status::OK();
4949
1
            }
4950
50
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
50
        }
4952
        if constexpr (left_const) {
4953
            auto xml_str = xml_col.get_data_at(0);
4954
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
                res_col.insert_many_defaults(input_rows_count);
4957
                return Status::OK();
4958
            }
4959
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
        }
4961
4962
131
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
            if constexpr (!right_const) {
4964
                auto xpath_str = xpath_col.get_data_at(i);
4965
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
                    res_col.insert_default();
4968
                    continue;
4969
                }
4970
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
            }
4972
80
            if constexpr (!left_const) {
4973
80
                auto xml_str = xml_col.get_data_at(i);
4974
80
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
5
                    res_col.insert_default();
4977
5
                    continue;
4978
5
                }
4979
75
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
75
            }
4981
75
            std::string text;
4982
80
            try {
4983
80
                text = xpath_query.evaluate_string(xml_doc);
4984
80
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
75
            res_col.insert_data(text.data(), text.size());
4989
75
        }
4990
51
        return Status::OK();
4991
51
    }
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
4939
153
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
153
        pugi::xml_document xml_doc;
4941
153
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
        if constexpr (right_const) {
4944
            auto xpath_str = xpath_col.get_data_at(0);
4945
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
                res_col.insert_many_defaults(input_rows_count);
4948
                return Status::OK();
4949
            }
4950
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
        }
4952
        if constexpr (left_const) {
4953
            auto xml_str = xml_col.get_data_at(0);
4954
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
                res_col.insert_many_defaults(input_rows_count);
4957
                return Status::OK();
4958
            }
4959
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
        }
4961
4962
399
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
247
            if constexpr (!right_const) {
4964
247
                auto xpath_str = xpath_col.get_data_at(i);
4965
247
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
17
                    res_col.insert_default();
4968
17
                    continue;
4969
17
                }
4970
230
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
230
            }
4972
247
            if constexpr (!left_const) {
4973
247
                auto xml_str = xml_col.get_data_at(i);
4974
247
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
15
                    res_col.insert_default();
4977
15
                    continue;
4978
15
                }
4979
232
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
232
            }
4981
231
            std::string text;
4982
247
            try {
4983
247
                text = xpath_query.evaluate_string(xml_doc);
4984
247
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
214
            res_col.insert_data(text.data(), text.size());
4989
214
        }
4990
152
        return Status::OK();
4991
153
    }
4992
};
4993
4994
class MakeSetImpl {
4995
public:
4996
    static constexpr auto name = "make_set";
4997
4998
0
    static size_t get_number_of_arguments() { return 0; }
4999
27
    static bool is_variadic() { return true; }
5000
26
    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
5001
26
        if (arguments[0].get()->is_nullable()) {
5002
12
            return make_nullable(std::make_shared<DataTypeString>());
5003
12
        }
5004
14
        return std::make_shared<DataTypeString>();
5005
26
    }
5006
5007
    static bool is_return_nullable(bool has_nullable,
5008
26
                                   const std::vector<ColumnWithConstAndNullMap>& cols_info) {
5009
26
        return cols_info[0].null_map != nullptr;
5010
26
    }
5011
5012
    static bool execute_const_null(ColumnString::MutablePtr& res_col,
5013
                                   PaddedPODArray<UInt8>& res_null_map_data,
5014
2
                                   size_t input_rows_count, size_t null_index) {
5015
2
        if (null_index == 1) {
5016
0
            res_col->insert_many_defaults(input_rows_count);
5017
0
            res_null_map_data.assign(input_rows_count, (UInt8)1);
5018
0
            return true;
5019
0
        }
5020
2
        return false;
5021
2
    }
5022
5023
    static void execute(const std::vector<ColumnWithConstAndNullMap>& column_infos,
5024
                        ColumnString::MutablePtr& res_col, PaddedPODArray<UInt8>& res_null_map_data,
5025
26
                        size_t input_rows_count) {
5026
26
        static constexpr char SEPARATOR = ',';
5027
26
        const auto& bit_data =
5028
26
                assert_cast<const ColumnInt64&>(*column_infos[0].nested_col).get_data();
5029
26
        std::vector<const ColumnString*> str_cols(column_infos.size());
5030
216
        for (size_t i = 1; i < column_infos.size(); ++i) {
5031
190
            str_cols[i] = assert_cast<const ColumnString*>(column_infos[i].nested_col);
5032
190
        }
5033
5034
182
        for (size_t row = 0; row < input_rows_count; ++row) {
5035
156
            if (column_infos[0].is_null_at(row)) {
5036
10
                res_col->insert_default();
5037
10
                res_null_map_data[row] = 1;
5038
10
                continue;
5039
10
            }
5040
5041
146
            uint64_t bit = bit_data[column_infos[0].is_const ? 0 : row];
5042
146
            uint64_t col_pos = __builtin_ffsll(bit);
5043
146
            ColumnString::Chars data;
5044
427
            while (col_pos != 0 && col_pos < column_infos.size() && bit != 0) {
5045
281
                if (!column_infos[col_pos].is_null_at(row)) {
5046
                    /* Here insert `str,` directly to support the case below:
5047
                     * SELECT MAKE_SET(3, '', 'a');
5048
                     * the exception result should be ',a'.
5049
                     */
5050
243
                    auto s_ref = str_cols[col_pos]->get_data_at(
5051
243
                            column_infos[col_pos].is_const ? 0 : row);
5052
243
                    data.insert(s_ref.data, s_ref.data + s_ref.size);
5053
243
                    data.push_back(SEPARATOR);
5054
243
                }
5055
281
                bit &= ~(1ULL << (col_pos - 1));
5056
281
                col_pos = __builtin_ffsll(bit);
5057
281
            }
5058
            // remove the last ','
5059
146
            if (!data.empty()) {
5060
132
                data.pop_back();
5061
132
            }
5062
146
            res_col->insert_data(reinterpret_cast<const char*>(data.data()), data.size());
5063
146
        }
5064
26
    }
5065
};
5066
5067
class FunctionExportSet : public IFunction {
5068
public:
5069
    static constexpr auto name = "export_set";
5070
60
    static FunctionPtr create() { return std::make_shared<FunctionExportSet>(); }
5071
0
    String get_name() const override { return name; }
5072
0
    size_t get_number_of_arguments() const override { return 0; }
5073
52
    bool is_variadic() const override { return true; }
5074
51
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5075
51
        return std::make_shared<DataTypeString>();
5076
51
    }
5077
5078
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5079
51
                        uint32_t result, size_t input_rows_count) const override {
5080
51
        auto res_col = ColumnString::create();
5081
5082
51
        const size_t arg_size = arguments.size();
5083
51
        bool col_const[5];
5084
51
        ColumnPtr arg_cols[5];
5085
51
        bool all_const = true;
5086
183
        for (int i = 1; i < arg_size; ++i) {
5087
132
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
5088
132
            all_const = all_const && col_const[i];
5089
132
        }
5090
51
        std::tie(arg_cols[0], col_const[0]) =
5091
51
                unpack_if_const(block.get_by_position(arguments[0]).column);
5092
51
        if (arg_size == 3) {
5093
35
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2}, block, arguments);
5094
35
        } else if (arg_size == 4) {
5095
2
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3}, block, arguments);
5096
14
        } else if (arg_size == 5) {
5097
14
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3, 4}, block,
5098
14
                                                 arguments);
5099
14
        }
5100
5101
51
        const auto* bit_col = assert_cast<const ColumnInt128*>(arg_cols[0].get());
5102
51
        const auto* on_col = assert_cast<const ColumnString*>(arg_cols[1].get());
5103
51
        const auto* off_col = assert_cast<const ColumnString*>(arg_cols[2].get());
5104
51
        const ColumnString* sep_col = nullptr;
5105
51
        const ColumnInt32* num_bits_col = nullptr;
5106
51
        if (arg_size > 3) {
5107
16
            sep_col = assert_cast<const ColumnString*>(arg_cols[3].get());
5108
16
            if (arg_size == 5) {
5109
14
                num_bits_col = assert_cast<const ColumnInt32*>(arg_cols[4].get());
5110
14
            }
5111
16
        }
5112
5113
210
        for (size_t i = 0; i < input_rows_count; ++i) {
5114
159
            uint64_t bit =
5115
159
                    check_and_get_bit(bit_col->get_element(index_check_const(i, col_const[0])));
5116
5117
159
            size_t idx_for_args = all_const ? 0 : i;
5118
159
            StringRef on = on_col->get_data_at(idx_for_args);
5119
159
            StringRef off = off_col->get_data_at(idx_for_args);
5120
159
            StringRef separator(",", 1);
5121
159
            int8_t num_of_bits = 64;
5122
5123
159
            if (arg_size > 3) {
5124
100
                separator = sep_col->get_data_at(idx_for_args);
5125
100
                if (arg_size == 5) {
5126
74
                    num_of_bits =
5127
74
                            check_and_get_num_of_bits(num_bits_col->get_element(idx_for_args));
5128
74
                }
5129
100
            }
5130
5131
159
            execute_single(bit, on, off, separator, num_of_bits, *res_col);
5132
159
        }
5133
51
        block.replace_by_position(result, std::move(res_col));
5134
51
        return Status::OK();
5135
51
    }
5136
5137
private:
5138
    /* The valid range of the input `bit` parameter should be [-2^63, 2^64 - 1]
5139
     * If it exceeds this range, the MAX/MIN values of the signed 64-bit integer are used for calculation
5140
     * This behavior is consistent with MySQL.
5141
     */
5142
159
    uint64_t check_and_get_bit(__int128 col_bit_val) const {
5143
159
        if (col_bit_val > ULLONG_MAX) {
5144
0
            return LLONG_MAX;
5145
159
        } else if (col_bit_val < LLONG_MIN) {
5146
0
            return LLONG_MIN;
5147
0
        }
5148
159
        return static_cast<uint64_t>(col_bit_val);
5149
159
    }
5150
5151
    // If the input value is not in the range [0, 64], return default value 64
5152
74
    int8_t check_and_get_num_of_bits(int32_t col_num_of_bits_val) const {
5153
74
        if (col_num_of_bits_val >= 0 && col_num_of_bits_val <= 64) {
5154
69
            return static_cast<int8_t>(col_num_of_bits_val);
5155
69
        }
5156
5
        return 64;
5157
74
    }
5158
5159
    void execute_single(uint64_t bit, const StringRef& on, const StringRef& off,
5160
                        const StringRef& separator, int8_t num_of_bits,
5161
159
                        ColumnString& res_col) const {
5162
159
        ColumnString::Chars data;
5163
159
        data.reserve(std::max(on.size, off.size) * num_of_bits +
5164
159
                     separator.size * (num_of_bits - 1));
5165
5166
4.29k
        while (bit && num_of_bits) {
5167
4.13k
            if (bit & 1) {
5168
2.57k
                data.insert(on.data, on.data + on.size);
5169
2.57k
            } else {
5170
1.55k
                data.insert(off.data, off.data + off.size);
5171
1.55k
            }
5172
4.13k
            bit >>= 1;
5173
4.13k
            if (--num_of_bits) {
5174
4.08k
                data.insert(separator.data, separator.data + separator.size);
5175
4.08k
            }
5176
4.13k
        }
5177
5178
159
        if (num_of_bits > 0) {
5179
103
            ColumnString::Chars off_sep_combo;
5180
103
            off_sep_combo.reserve(separator.size + off.size);
5181
103
            off_sep_combo.insert(off_sep_combo.end(), off.data, off.data + off.size);
5182
103
            off_sep_combo.insert(off_sep_combo.end(), separator.data,
5183
103
                                 separator.data + separator.size);
5184
5185
2.98k
            for (size_t i = 0; i < num_of_bits; ++i) {
5186
2.88k
                data.insert(off_sep_combo.data(), off_sep_combo.data() + off_sep_combo.size());
5187
2.88k
            }
5188
103
            data.erase(data.end() - separator.size, data.end());
5189
103
        }
5190
5191
159
        res_col.insert_data(reinterpret_cast<const char*>(data.data()), data.size());
5192
159
    }
5193
};
5194
5195
// ATTN: for debug only
5196
// compute crc32 hash value as the same way in `VOlapTablePartitionParam::find_tablets()`
5197
class FunctionCrc32Internal : public IFunction {
5198
public:
5199
    static constexpr auto name = "crc32_internal";
5200
47.8k
    static FunctionPtr create() { return std::make_shared<FunctionCrc32Internal>(); }
5201
0
    String get_name() const override { return name; }
5202
0
    size_t get_number_of_arguments() const override { return 0; }
5203
47.8k
    bool is_variadic() const override { return true; }
5204
61.9k
    bool use_default_implementation_for_nulls() const override { return false; }
5205
47.8k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5206
47.8k
        return std::make_shared<DataTypeInt64>();
5207
47.8k
    }
5208
5209
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5210
14.0k
                        uint32_t result, size_t input_rows_count) const override {
5211
14.0k
        DCHECK_GE(arguments.size(), 1);
5212
5213
14.0k
        auto argument_size = arguments.size();
5214
14.0k
        std::vector<ColumnPtr> argument_columns(argument_size);
5215
14.0k
        std::vector<PrimitiveType> argument_primitive_types(argument_size);
5216
5217
28.4k
        for (size_t i = 0; i < argument_size; ++i) {
5218
14.3k
            argument_columns[i] =
5219
14.3k
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
5220
14.3k
            argument_primitive_types[i] =
5221
14.3k
                    block.get_by_position(arguments[i]).type->get_primitive_type();
5222
14.3k
        }
5223
5224
14.0k
        auto res_col = ColumnInt64::create();
5225
14.0k
        auto& res_data = res_col->get_data();
5226
14.0k
        res_data.resize_fill(input_rows_count, 0);
5227
5228
15.0M
        for (size_t i = 0; i < input_rows_count; ++i) {
5229
15.0M
            uint32_t hash_val = 0;
5230
30.1M
            for (size_t j = 0; j < argument_size; ++j) {
5231
15.0M
                const auto& column = argument_columns[j];
5232
15.0M
                auto primitive_type = argument_primitive_types[j];
5233
15.0M
                auto val = column->get_data_at(i);
5234
15.0M
                if (val.data != nullptr) {
5235
15.0M
                    hash_val = RawValue::zlib_crc32(val.data, val.size, primitive_type, hash_val);
5236
15.0M
                } else {
5237
14.4k
                    hash_val = HashUtil::zlib_crc_hash_null(hash_val);
5238
14.4k
                }
5239
15.0M
            }
5240
15.0M
            res_data[i] = hash_val;
5241
15.0M
        }
5242
5243
14.0k
        block.replace_by_position(result, std::move(res_col));
5244
14.0k
        return Status::OK();
5245
14.0k
    }
5246
};
5247
5248
class FunctionUnicodeNormalize : public IFunction {
5249
public:
5250
    static constexpr auto name = "unicode_normalize";
5251
5252
24
    static FunctionPtr create() { return std::make_shared<FunctionUnicodeNormalize>(); }
5253
5254
5
    String get_name() const override { return name; }
5255
5256
15
    size_t get_number_of_arguments() const override { return 2; }
5257
5258
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5259
15
        if (arguments.size() != 2 || !is_string_type(arguments[0]->get_primitive_type()) ||
5260
15
            !is_string_type(arguments[1]->get_primitive_type())) {
5261
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
5262
0
                                   "Illegal type {} and {} of arguments of function {}",
5263
0
                                   arguments[0]->get_name(), arguments[1]->get_name(), get_name());
5264
0
        }
5265
15
        return arguments[0];
5266
15
    }
5267
5268
16
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
5269
5270
32
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
5271
32
        if (scope == FunctionContext::THREAD_LOCAL) {
5272
17
            return Status::OK();
5273
17
        }
5274
5275
15
        if (!context->is_col_constant(1)) {
5276
2
            return Status::InvalidArgument(
5277
2
                    "The second argument 'mode' of function {} must be constant", get_name());
5278
2
        }
5279
5280
13
        auto* const_col = context->get_constant_col(1);
5281
13
        auto mode_ref = const_col->column_ptr->get_data_at(0);
5282
13
        std::string lower_mode = doris::to_lower(std::string(doris::trim(mode_ref.to_string())));
5283
5284
13
        UErrorCode status = U_ZERO_ERROR;
5285
13
        const icu::Normalizer2* normalizer = nullptr;
5286
5287
13
        if (lower_mode == "nfc") {
5288
5
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfc", UNORM2_COMPOSE, status);
5289
8
        } else if (lower_mode == "nfd") {
5290
2
            normalizer = icu::Normalizer2::getNFDInstance(status);
5291
6
        } else if (lower_mode == "nfkc") {
5292
0
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc", UNORM2_COMPOSE, status);
5293
6
        } else if (lower_mode == "nfkd") {
5294
2
            normalizer = icu::Normalizer2::getNFKDInstance(status);
5295
4
        } else if (lower_mode == "nfkc_cf") {
5296
2
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, status);
5297
2
        } else {
5298
2
            return Status::InvalidArgument(
5299
2
                    "Invalid normalization mode '{}' for function {}. "
5300
2
                    "Supported modes: NFC, NFD, NFKC, NFKD, NFKC_CF",
5301
2
                    lower_mode, get_name());
5302
2
        }
5303
5304
11
        if (U_FAILURE(status) || normalizer == nullptr) {
5305
0
            return Status::InvalidArgument(
5306
0
                    "Failed to get normalizer instance for mode '{}' in function {}: {}",
5307
0
                    lower_mode, get_name(), u_errorName(status));
5308
0
        }
5309
5310
11
        auto state = std::make_shared<UnicodeNormalizeState>();
5311
11
        state->normalizer = normalizer;
5312
11
        context->set_function_state(scope, state);
5313
11
        return Status::OK();
5314
11
    }
5315
5316
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5317
11
                        uint32_t result, size_t input_rows_count) const override {
5318
11
        auto* state = reinterpret_cast<UnicodeNormalizeState*>(
5319
11
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
5320
11
        if (state == nullptr || state->normalizer == nullptr) {
5321
0
            return Status::RuntimeError("unicode_normalize function state is not initialized");
5322
0
        }
5323
5324
11
        ColumnPtr col =
5325
11
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
5326
11
        const auto* col_str = check_and_get_column<ColumnString>(col.get());
5327
11
        if (col_str == nullptr) {
5328
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
5329
0
                                        block.get_by_position(arguments[0]).column->get_name(),
5330
0
                                        get_name());
5331
0
        }
5332
5333
11
        const auto& data = col_str->get_chars();
5334
11
        const auto& offsets = col_str->get_offsets();
5335
5336
11
        auto res = ColumnString::create();
5337
11
        auto& res_data = res->get_chars();
5338
11
        auto& res_offsets = res->get_offsets();
5339
5340
11
        size_t rows = offsets.size();
5341
11
        res_offsets.resize(rows);
5342
5343
11
        std::string tmp;
5344
22
        for (size_t i = 0; i < rows; ++i) {
5345
11
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
5346
11
            size_t len = offsets[i] - offsets[i - 1];
5347
5348
11
            normalize_one(state->normalizer, begin, len, tmp);
5349
11
            StringOP::push_value_string(tmp, i, res_data, res_offsets);
5350
11
        }
5351
5352
11
        block.replace_by_position(result, std::move(res));
5353
11
        return Status::OK();
5354
11
    }
5355
5356
private:
5357
    struct UnicodeNormalizeState {
5358
        const icu::Normalizer2* normalizer = nullptr;
5359
    };
5360
5361
    static void normalize_one(const icu::Normalizer2* normalizer, const char* input, size_t length,
5362
11
                              std::string& output) {
5363
11
        if (length == 0) {
5364
1
            output.clear();
5365
1
            return;
5366
1
        }
5367
5368
10
        icu::StringPiece sp(input, static_cast<int32_t>(length));
5369
10
        icu::UnicodeString src16 = icu::UnicodeString::fromUTF8(sp);
5370
5371
10
        UErrorCode status = U_ZERO_ERROR;
5372
10
        UNormalizationCheckResult quick = normalizer->quickCheck(src16, status);
5373
10
        if (U_SUCCESS(status) && quick == UNORM_YES) {
5374
4
            output.assign(input, length);
5375
4
            return;
5376
4
        }
5377
5378
6
        icu::UnicodeString result16;
5379
6
        status = U_ZERO_ERROR;
5380
6
        normalizer->normalize(src16, result16, status);
5381
6
        if (U_FAILURE(status)) {
5382
0
            output.assign(input, length);
5383
0
            return;
5384
0
        }
5385
5386
6
        output.clear();
5387
6
        result16.toUTF8String(output);
5388
6
    }
5389
};
5390
5391
#include "common/compile_check_avoid_end.h"
5392
} // namespace doris