Coverage Report

Created: 2026-03-15 18:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.h
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#pragma once
19
20
#include <glog/logging.h>
21
#include <sys/types.h>
22
23
#include <algorithm>
24
#include <array>
25
#include <boost/iterator/iterator_facade.hpp>
26
#include <boost/locale.hpp>
27
#include <climits>
28
#include <cmath>
29
#include <cstddef>
30
#include <cstdlib>
31
#include <cstring>
32
#include <iomanip>
33
#include <memory>
34
#include <ostream>
35
#include <random>
36
#include <sstream>
37
#include <tuple>
38
#include <type_traits>
39
#include <unordered_map>
40
#include <utility>
41
#include <variant>
42
#include <vector>
43
44
#include "common/compiler_util.h" // IWYU pragma: keep
45
#include "common/exception.h"
46
#include "common/status.h"
47
#include "core/block/block.h"
48
#include "core/block/column_numbers.h"
49
#include "core/block/column_with_type_and_name.h"
50
#include "core/column/column.h"
51
#include "core/column/column_const.h"
52
#include "core/column/column_varbinary.h"
53
#include "core/column/column_vector.h"
54
#include "core/data_type/data_type.h"
55
#include "core/data_type/define_primitive_type.h"
56
#include "core/data_type/primitive_type.h"
57
#include "core/memcmp_small.h"
58
#include "core/memcpy_small.h"
59
#include "core/pod_array.h"
60
#include "core/pod_array_fwd.h"
61
#include "core/types.h"
62
#include "core/value/decimalv2_value.h"
63
#include "exec/common/hash_table/phmap_fwd_decl.h"
64
#include "exec/common/int_exp.h"
65
#include "exec/common/template_helpers.hpp"
66
#include "exprs/aggregate/aggregate_function.h"
67
#include "exprs/function/function_needs_to_handle_null.h"
68
#include "util/raw_value.h"
69
#include "util/sha.h"
70
#include "util/string_search.hpp"
71
#include "util/string_util.h"
72
#include "util/utf8_check.h"
73
74
#ifndef USE_LIBCPP
75
#include <memory_resource>
76
#define PMR std::pmr
77
#else
78
#include <boost/container/pmr/monotonic_buffer_resource.hpp>
79
#include <boost/container/pmr/vector.hpp>
80
#define PMR boost::container::pmr
81
#endif
82
83
#include <fmt/format.h>
84
#include <unicode/normalizer2.h>
85
#include <unicode/stringpiece.h>
86
#include <unicode/unistr.h>
87
88
#include <cstdint>
89
#include <string>
90
#include <string_view>
91
92
#include "core/assert_cast.h"
93
#include "core/column/column_array.h"
94
#include "core/column/column_decimal.h"
95
#include "core/column/column_nullable.h"
96
#include "core/column/column_string.h"
97
#include "core/data_type/data_type_array.h"
98
#include "core/data_type/data_type_decimal.h"
99
#include "core/data_type/data_type_nullable.h"
100
#include "core/data_type/data_type_number.h"
101
#include "core/data_type/data_type_string.h"
102
#include "core/string_ref.h"
103
#include "exec/common/pinyin.h"
104
#include "exec/common/stringop_substring.h"
105
#include "exec/common/util.hpp"
106
#include "exprs/function/function.h"
107
#include "exprs/function/function_helpers.h"
108
#include "exprs/function_context.h"
109
#include "exprs/math_functions.h"
110
#include "pugixml.hpp"
111
#include "util/md5.h"
112
#include "util/simd/vstring_function.h"
113
#include "util/sm3.h"
114
#include "util/url_coding.h"
115
#include "util/url_parser.h"
116
117
namespace doris {
118
#include "common/compile_check_avoid_begin.h"
119
class FunctionStrcmp : public IFunction {
120
public:
121
    static constexpr auto name = "strcmp";
122
123
8
    static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); }
124
125
1
    String get_name() const override { return name; }
126
127
0
    size_t get_number_of_arguments() const override { return 2; }
128
129
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
130
0
        return std::make_shared<DataTypeInt8>();
131
0
    }
132
133
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
134
0
                        uint32_t result, size_t input_rows_count) const override {
135
0
        const auto& [arg0_column, arg0_const] =
136
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
137
0
        const auto& [arg1_column, arg1_const] =
138
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
139
140
0
        auto result_column = ColumnInt8::create(input_rows_count);
141
142
0
        if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) {
143
0
            if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) {
144
0
                if (arg0_const) {
145
0
                    scalar_vector(arg0->get_data_at(0), *arg1, *result_column);
146
0
                } else if (arg1_const) {
147
0
                    vector_scalar(*arg0, arg1->get_data_at(0), *result_column);
148
0
                } else {
149
0
                    vector_vector(*arg0, *arg1, *result_column);
150
0
                }
151
0
            }
152
0
        }
153
154
0
        block.replace_by_position(result, std::move(result_column));
155
0
        return Status::OK();
156
0
    }
157
158
private:
159
0
    static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) {
160
0
        size_t size = vec1.size();
161
0
        for (size_t i = 0; i < size; ++i) {
162
0
            res.get_data()[i] = str.compare(vec1.get_data_at(i));
163
0
        }
164
0
    }
165
166
0
    static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) {
167
0
        size_t size = vec0.size();
168
0
        for (size_t i = 0; i < size; ++i) {
169
0
            res.get_data()[i] = vec0.get_data_at(i).compare(str);
170
0
        }
171
0
    }
172
173
0
    static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) {
174
0
        size_t size = vec0.size();
175
0
        for (size_t i = 0; i < size; ++i) {
176
0
            res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i));
177
0
        }
178
0
    }
179
};
180
181
class FunctionAutoPartitionName : public IFunction {
182
public:
183
    static constexpr auto name = "auto_partition_name";
184
8
    static FunctionPtr create() { return std::make_shared<FunctionAutoPartitionName>(); }
185
0
    String get_name() const override { return name; }
186
0
    size_t get_number_of_arguments() const override { return 0; }
187
1
    bool is_variadic() const override { return true; }
188
0
    bool use_default_implementation_for_nulls() const override { return false; }
189
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
190
0
        return std::make_shared<DataTypeString>();
191
0
    }
192
193
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
194
0
                        uint32_t result, size_t input_rows_count) const override {
195
0
        size_t argument_size = arguments.size();
196
0
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
197
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
198
0
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
199
0
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
200
0
        std::vector<bool> is_const_args(argument_size);
201
0
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
202
0
        std::vector<ColumnPtr> argument_null_columns(argument_size);
203
204
0
        std::vector<ColumnPtr> argument_columns(argument_size);
205
0
        for (int i = 0; i < argument_size; ++i) {
206
0
            argument_columns[i] =
207
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
208
0
            if (const auto* nullable =
209
0
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
210
0
                null_list[i] = &nullable->get_null_map_data();
211
0
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
212
0
                argument_columns[i] = nullable->get_nested_column_ptr();
213
0
            } else {
214
0
                null_list[i] = &const_null_map->get_data();
215
0
            }
216
217
0
            const auto& [col, is_const] =
218
0
                    unpack_if_const(block.get_by_position(arguments[i]).column);
219
220
0
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
221
0
            chars_list[i] = &col_str->get_chars();
222
0
            offsets_list[i] = &col_str->get_offsets();
223
0
            is_const_args[i] = is_const;
224
0
        }
225
226
0
        auto res = ColumnString::create();
227
0
        auto& res_data = res->get_chars();
228
0
        auto& res_offset = res->get_offsets();
229
0
        res_offset.resize(input_rows_count);
230
231
0
        const char* partition_type = chars_list[0]->raw_data();
232
        // partition type is list|range
233
0
        if (std::strncmp(partition_type, "list", 4) == 0) {
234
0
            return _auto_partition_type_of_list(chars_list, offsets_list, is_const_args, null_list,
235
0
                                                res_data, res_offset, input_rows_count,
236
0
                                                argument_size, block, result, res);
237
0
        } else {
238
0
            return _auto_partition_type_of_range(chars_list, offsets_list, is_const_args, res_data,
239
0
                                                 res_offset, input_rows_count, argument_size, block,
240
0
                                                 result, res);
241
0
        }
242
0
        return Status::OK();
243
0
    }
244
245
private:
246
0
    std::u16string _string_to_u16string(const std::string& str) const {
247
0
        return boost::locale::conv::utf_to_utf<char16_t>(str);
248
0
    }
249
250
0
    std::string _string_to_unicode(const std::u16string& s) const {
251
0
        std::string res_s;
252
0
        res_s.reserve(s.size());
253
0
        if (s.length() > 0 && s[0] == '-') {
254
0
            res_s += '_';
255
0
        }
256
0
        for (int i = 0; i < s.length(); i++) {
257
0
            char16_t ch = s[i];
258
0
            if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
259
0
                res_s += ch;
260
0
            } else {
261
0
                int unicodeValue = _get_code_point_at(s, i);
262
0
                res_s += fmt::format("{:02x}", static_cast<uint32_t>(unicodeValue));
263
0
            }
264
0
        }
265
0
        return res_s;
266
0
    }
267
268
0
    int _get_code_point_at(const std::u16string& str, std::size_t index) const {
269
0
        char16_t first = str[index];
270
        // [0xD800,0xDBFF] is the scope of the first code unit
271
0
        if ((first >= 0xD800 && first <= 0xDBFF) && (index + 1 < str.size())) {
272
0
            char16_t second = str[index + 1];
273
            // [0xDC00,0xDFFF] is the scope of the second code unit
274
0
            if (second >= 0xDC00 && second <= 0xDFFF) {
275
0
                return ((first - 0xD800) << 10) + (second - 0xDC00) + 0x10000;
276
0
            }
277
0
        }
278
279
0
        return first;
280
0
    }
281
    Status _auto_partition_type_of_list(std::vector<const ColumnString::Chars*>& chars_list,
282
                                        std::vector<const ColumnString::Offsets*>& offsets_list,
283
                                        std::vector<bool>& is_const_args,
284
                                        const std::vector<const ColumnUInt8::Container*>& null_list,
285
                                        auto& res_data, auto& res_offset, size_t input_rows_count,
286
                                        size_t argument_size, Block& block, uint32_t result,
287
0
                                        auto& res) const {
288
0
        int curr_len = 0;
289
0
        for (int row = 0; row < input_rows_count; row++) {
290
0
            std::string res_p;
291
0
            res_p.reserve(argument_size * 5);
292
0
            res_p += 'p';
293
0
            for (int col = 1; col < argument_size; col++) {
294
0
                const auto& current_offsets = *offsets_list[col];
295
0
                const auto& current_chars = *chars_list[col];
296
0
                const auto& current_nullmap = *null_list[col];
297
298
0
                if (current_nullmap[row]) {
299
0
                    res_p += 'X';
300
0
                } else {
301
0
                    auto idx = index_check_const(row, is_const_args[col]);
302
303
0
                    int size = current_offsets[idx] - current_offsets[idx - 1];
304
0
                    const char* raw_chars =
305
0
                            reinterpret_cast<const char*>(&current_chars[current_offsets[idx - 1]]);
306
                    // convert string to u16string in order to convert to unicode strings
307
0
                    const std::string raw_str(raw_chars, size);
308
0
                    auto u16string = _string_to_u16string(raw_str);
309
0
                    res_p += _string_to_unicode(u16string) + std::to_string(u16string.size());
310
0
                }
311
0
            }
312
313
            // check the name of length
314
0
            int len = res_p.size();
315
0
            if (len > 50) {
316
0
                res_p = std::format("{}_{:08x}", res_p.substr(0, 50), to_hash_code(res_p));
317
0
                len = res_p.size();
318
0
            }
319
0
            curr_len += len;
320
0
            res_data.resize(curr_len);
321
0
            memcpy(&res_data[res_offset[row - 1]], res_p.c_str(), len);
322
0
            res_offset[row] = res_offset[row - 1] + len;
323
0
        }
324
0
        block.get_by_position(result).column = std::move(res);
325
0
        return Status::OK();
326
0
    }
327
328
    size_t _copy_date_str_of_len_to_res_data(auto& res_data, auto& res_offset,
329
                                             std::vector<std::string>& date_str, size_t row,
330
0
                                             size_t len) const {
331
0
        size_t curr_len = 1;
332
0
        for (int j = 0; j < len; j++) {
333
0
            memcpy(&res_data[res_offset[row - 1]] + curr_len, date_str[j].c_str(),
334
0
                   date_str[j].size());
335
0
            curr_len += date_str[j].size();
336
0
        }
337
0
        return curr_len;
338
0
    }
339
340
    Status _auto_partition_type_of_range(std::vector<const ColumnString::Chars*>& chars_list,
341
                                         std::vector<const ColumnString::Offsets*>& offsets_list,
342
                                         std::vector<bool>& is_const_args, auto& res_data,
343
                                         auto& res_offset, size_t input_rows_count,
344
                                         size_t argument_size, Block& block, uint32_t result,
345
0
                                         auto& res) const {
346
0
        const char* range_type = chars_list[1]->raw_data();
347
348
0
        res_data.resize(15 * input_rows_count);
349
0
        for (int i = 0; i < input_rows_count; i++) {
350
0
            const auto& current_offsets = *offsets_list[2];
351
0
            const auto& current_chars = *chars_list[2];
352
353
0
            auto idx = index_check_const(i, is_const_args[2]);
354
0
            int size = current_offsets[idx] - current_offsets[idx - 1];
355
0
            const char* tmp =
356
0
                    reinterpret_cast<const char*>(&current_chars[current_offsets[idx - 1]]);
357
0
            std::string to_split_s(tmp, size);
358
359
            // check the str if it is date|datetime
360
0
            RE2 date_regex(R"(^\d{4}-\d{2}-\d{2}( \d{2}:\d{2}:\d{2})?$)");
361
0
            if (!RE2::FullMatch(to_split_s, date_regex)) {
362
0
                return Status::InvalidArgument("The range partition only support DATE|DATETIME");
363
0
            }
364
365
            // split date_str from (yyyy-mm-dd hh:mm:ss) to ([yyyy, mm, dd, hh, mm, ss])
366
0
            std::vector<std::string> date_str(6);
367
0
            date_str[0] = to_split_s.substr(0, 4);
368
0
            for (int ni = 5, j = 1; ni <= size; ni += 3, j++) {
369
0
                date_str[j] = to_split_s.substr(ni, 2);
370
0
            }
371
0
            int curr_len = 0;
372
373
0
            res_data[res_offset[i - 1]] = 'p';
374
            // raw => 2022-12-12 11:30:20
375
            // year => 2022 01 01 00 00 00
376
            // month => 2022 12 01 00 00 00
377
            // day => 2022 12 12 00 00 00
378
            // hour => 2022 12 12 11 00 00
379
            // minute => 2022 12  11 30 00
380
            // second => 2022 12 12 12 30 20
381
382
0
            if (!strncmp(range_type, "year", 4)) {
383
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 1);
384
0
                memcpy(&res_data[res_offset[i - 1]] + curr_len, "0101", 4);
385
0
                curr_len += 4;
386
0
            } else if (!strncmp(range_type, "month", 5)) {
387
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 2);
388
0
                memcpy(&res_data[res_offset[i - 1]] + curr_len, "01", 2);
389
0
                curr_len += 2;
390
0
            } else if (!strncmp(range_type, "day", 3)) {
391
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 3);
392
0
            } else if (!strncmp(range_type, "hour", 4)) {
393
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 4);
394
0
            } else if (!strncmp(range_type, "minute", 6)) {
395
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 5);
396
0
            } else if (!strncmp(range_type, "second", 6)) {
397
0
                curr_len += _copy_date_str_of_len_to_res_data(res_data, res_offset, date_str, i, 6);
398
0
            }
399
400
            // fill in zero
401
0
            int zero = 15 - curr_len;
402
0
            std::fill_n(&res_data[res_offset[i - 1]] + curr_len, zero, '0');
403
0
            curr_len += zero;
404
0
            res_offset[i] = res_offset[i - 1] + curr_len;
405
0
        }
406
0
        block.get_by_position(result).column = std::move(res);
407
0
        return Status::OK();
408
0
    }
409
410
0
    int32_t to_hash_code(const std::string& str) const {
411
0
        uint64_t h = 0;
412
0
        for (uint8_t c : str) {
413
0
            h = (h * 31U + c) & 0xFFFFFFFFU;
414
0
        }
415
0
        return static_cast<int32_t>(h);
416
0
    }
417
};
418
419
template <typename Impl>
420
class FunctionSubstring : public IFunction {
421
public:
422
    static constexpr auto name = SubstringUtil::name;
423
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE8get_nameB5cxx11Ev
Line
Count
Source
423
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE8get_nameB5cxx11Ev
Line
Count
Source
423
1
    String get_name() const override { return name; }
424
2.27k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr3ImplEE6createEv
Line
Count
Source
424
2.18k
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
_ZN5doris17FunctionSubstringINS_11Substr2ImplEE6createEv
Line
Count
Source
424
93
    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
425
426
2.26k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
2.26k
        return std::make_shared<DataTypeString>();
428
2.26k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
426
2.17k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
2.17k
        return std::make_shared<DataTypeString>();
428
2.17k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
426
85
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
427
85
        return std::make_shared<DataTypeString>();
428
85
    }
429
2.27k
    DataTypes get_variadic_argument_types_impl() const override {
430
2.27k
        return Impl::get_variadic_argument_types();
431
2.27k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
429
2.18k
    DataTypes get_variadic_argument_types_impl() const override {
430
2.18k
        return Impl::get_variadic_argument_types();
431
2.18k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
429
92
    DataTypes get_variadic_argument_types_impl() const override {
430
92
        return Impl::get_variadic_argument_types();
431
92
    }
432
2.26k
    size_t get_number_of_arguments() const override {
433
2.26k
        return get_variadic_argument_types_impl().size();
434
2.26k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE23get_number_of_argumentsEv
Line
Count
Source
432
2.17k
    size_t get_number_of_arguments() const override {
433
2.17k
        return get_variadic_argument_types_impl().size();
434
2.17k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE23get_number_of_argumentsEv
Line
Count
Source
432
85
    size_t get_number_of_arguments() const override {
433
85
        return get_variadic_argument_types_impl().size();
434
85
    }
435
436
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
437
1.84k
                        uint32_t result, size_t input_rows_count) const override {
438
1.84k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
1.84k
    }
_ZNK5doris17FunctionSubstringINS_11Substr3ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
437
1.78k
                        uint32_t result, size_t input_rows_count) const override {
438
1.78k
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
1.78k
    }
_ZNK5doris17FunctionSubstringINS_11Substr2ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
437
55
                        uint32_t result, size_t input_rows_count) const override {
438
55
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
439
55
    }
440
};
441
442
struct Substr3Impl {
443
2.18k
    static DataTypes get_variadic_argument_types() {
444
2.18k
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
445
2.18k
                std::make_shared<DataTypeInt32>()};
446
2.18k
    }
447
448
    static Status execute_impl(FunctionContext* context, Block& block,
449
                               const ColumnNumbers& arguments, uint32_t result,
450
1.78k
                               size_t input_rows_count) {
451
1.78k
        SubstringUtil::substring_execute(block, arguments, result, input_rows_count);
452
1.78k
        return Status::OK();
453
1.78k
    }
454
};
455
456
struct Substr2Impl {
457
92
    static DataTypes get_variadic_argument_types() {
458
92
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
459
92
    }
460
461
    static Status execute_impl(FunctionContext* context, Block& block,
462
                               const ColumnNumbers& arguments, uint32_t result,
463
55
                               size_t input_rows_count) {
464
55
        auto col_len = ColumnInt32::create(input_rows_count);
465
55
        auto& strlen_data = col_len->get_data();
466
467
55
        ColumnPtr str_col;
468
55
        bool str_const;
469
55
        std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
470
471
55
        const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
472
473
55
        if (str_const) {
474
18
            std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
475
37
        } else {
476
101
            for (int i = 0; i < input_rows_count; ++i) {
477
64
                strlen_data[i] = str_offset[i] - str_offset[i - 1];
478
64
            }
479
37
        }
480
481
        // we complete the column2(strlen) with the default value - each row's strlen.
482
55
        block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"});
483
55
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
484
485
55
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
486
55
        return Status::OK();
487
55
    }
488
};
489
490
template <bool Reverse>
491
class FunctionMaskPartial;
492
493
class FunctionMask : public IFunction {
494
public:
495
    static constexpr auto name = "mask";
496
    static constexpr unsigned char DEFAULT_UPPER_MASK = 'X';
497
    static constexpr unsigned char DEFAULT_LOWER_MASK = 'x';
498
    static constexpr unsigned char DEFAULT_NUMBER_MASK = 'n';
499
0
    String get_name() const override { return name; }
500
8
    static FunctionPtr create() { return std::make_shared<FunctionMask>(); }
501
502
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
503
0
        return std::make_shared<DataTypeString>();
504
0
    }
505
506
0
    size_t get_number_of_arguments() const override { return 0; }
507
508
0
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1, 2, 3}; }
509
510
1
    bool is_variadic() const override { return true; }
511
512
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
513
0
                        uint32_t result, size_t input_rows_count) const override {
514
0
        DCHECK_GE(arguments.size(), 1);
515
0
        DCHECK_LE(arguments.size(), 4);
516
517
0
        char upper = DEFAULT_UPPER_MASK, lower = DEFAULT_LOWER_MASK, number = DEFAULT_NUMBER_MASK;
518
519
0
        auto res = ColumnString::create();
520
0
        const auto& source_column =
521
0
                assert_cast<const ColumnString&>(*block.get_by_position(arguments[0]).column);
522
523
0
        if (arguments.size() > 1) {
524
0
            const auto& col = *block.get_by_position(arguments[1]).column;
525
0
            auto string_ref = col.get_data_at(0);
526
0
            if (string_ref.size > 0) {
527
0
                upper = *string_ref.data;
528
0
            }
529
0
        }
530
531
0
        if (arguments.size() > 2) {
532
0
            const auto& col = *block.get_by_position(arguments[2]).column;
533
0
            auto string_ref = col.get_data_at(0);
534
0
            if (string_ref.size > 0) {
535
0
                lower = *string_ref.data;
536
0
            }
537
0
        }
538
539
0
        if (arguments.size() > 3) {
540
0
            const auto& col = *block.get_by_position(arguments[3]).column;
541
0
            auto string_ref = col.get_data_at(0);
542
0
            if (string_ref.size > 0) {
543
0
                number = *string_ref.data;
544
0
            }
545
0
        }
546
547
0
        if (arguments.size() > 4) {
548
0
            return Status::InvalidArgument(
549
0
                    fmt::format("too many arguments for function {}", get_name()));
550
0
        }
551
552
0
        vector_mask(source_column, *res, upper, lower, number);
553
554
0
        block.get_by_position(result).column = std::move(res);
555
556
0
        return Status::OK();
557
0
    }
558
    friend class FunctionMaskPartial<true>;
559
    friend class FunctionMaskPartial<false>;
560
561
private:
562
    static void vector_mask(const ColumnString& source, ColumnString& result, const char upper,
563
0
                            const char lower, const char number) {
564
0
        result.get_chars().resize(source.get_chars().size());
565
0
        result.get_offsets().resize(source.get_offsets().size());
566
0
        memcpy_small_allow_read_write_overflow15(
567
0
                result.get_offsets().data(), source.get_offsets().data(),
568
0
                source.get_offsets().size() * sizeof(ColumnString::Offset));
569
570
0
        const unsigned char* src = source.get_chars().data();
571
0
        const size_t size = source.get_chars().size();
572
0
        unsigned char* res = result.get_chars().data();
573
0
        mask(src, size, upper, lower, number, res);
574
0
    }
575
576
    static void mask(const unsigned char* __restrict src, const size_t size,
577
                     const unsigned char upper, const unsigned char lower,
578
0
                     const unsigned char number, unsigned char* __restrict res) {
579
0
        for (size_t i = 0; i != size; ++i) {
580
0
            auto c = src[i];
581
0
            if (c >= 'A' && c <= 'Z') {
582
0
                res[i] = upper;
583
0
            } else if (c >= 'a' && c <= 'z') {
584
0
                res[i] = lower;
585
0
            } else if (c >= '0' && c <= '9') {
586
0
                res[i] = number;
587
0
            } else {
588
0
                res[i] = c;
589
0
            }
590
0
        }
591
0
    }
592
};
593
594
template <bool Reverse>
595
class FunctionMaskPartial : public IFunction {
596
public:
597
    static constexpr auto name = Reverse ? "mask_last_n" : "mask_first_n";
598
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE8get_nameB5cxx11Ev
599
16
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
_ZN5doris19FunctionMaskPartialILb1EE6createEv
Line
Count
Source
599
8
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
_ZN5doris19FunctionMaskPartialILb0EE6createEv
Line
Count
Source
599
8
    static FunctionPtr create() { return std::make_shared<FunctionMaskPartial>(); }
600
601
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
602
0
        return std::make_shared<DataTypeString>();
603
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS6_EE
604
605
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE23get_number_of_argumentsEv
606
607
2
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionMaskPartialILb1EE11is_variadicEv
Line
Count
Source
607
1
    bool is_variadic() const override { return true; }
_ZNK5doris19FunctionMaskPartialILb0EE11is_variadicEv
Line
Count
Source
607
1
    bool is_variadic() const override { return true; }
608
609
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
610
0
                        uint32_t result, size_t input_rows_count) const override {
611
0
        auto res = ColumnString::create();
612
0
        auto col = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
613
0
        const auto& source_column = assert_cast<const ColumnString&>(*col);
614
615
0
        if (arguments.size() == 1) { // no 2nd arg, just mask all
616
0
            FunctionMask::vector_mask(source_column, *res, FunctionMask::DEFAULT_UPPER_MASK,
617
0
                                      FunctionMask::DEFAULT_LOWER_MASK,
618
0
                                      FunctionMask::DEFAULT_NUMBER_MASK);
619
0
        } else {
620
0
            const auto& [col_2nd, is_const] =
621
0
                    unpack_if_const(block.get_by_position(arguments[1]).column);
622
623
0
            const auto& col_n = assert_cast<const ColumnInt32&>(*col_2nd);
624
625
0
            if (is_const) {
626
0
                RETURN_IF_ERROR(vector<true>(source_column, col_n, *res));
627
0
            } else {
628
0
                RETURN_IF_ERROR(vector<false>(source_column, col_n, *res));
629
0
            }
630
0
        }
631
632
0
        block.get_by_position(result).column = std::move(res);
633
634
0
        return Status::OK();
635
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris19FunctionMaskPartialILb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
636
637
private:
638
    template <bool is_const>
639
0
    static Status vector(const ColumnString& src, const ColumnInt32& col_n, ColumnString& result) {
640
0
        const auto num_rows = src.size();
641
0
        const auto* chars = src.get_chars().data();
642
0
        const auto* offsets = src.get_offsets().data();
643
0
        result.get_chars().resize(src.get_chars().size());
644
0
        result.get_offsets().resize(src.get_offsets().size());
645
0
        memcpy_small_allow_read_write_overflow15(
646
0
                result.get_offsets().data(), src.get_offsets().data(),
647
0
                src.get_offsets().size() * sizeof(ColumnString::Offset));
648
0
        auto* res = result.get_chars().data();
649
650
0
        const auto& col_n_data = col_n.get_data();
651
652
0
        for (ssize_t i = 0; i != num_rows; ++i) {
653
0
            auto offset = offsets[i - 1];
654
0
            int len = offsets[i] - offset;
655
0
            const int n = col_n_data[index_check_const<is_const>(i)];
656
657
0
            if (n < 0) [[unlikely]] {
658
0
                return Status::InvalidArgument(
659
0
                        "function {} only accept non-negative input for 2nd argument but got {}",
660
0
                        name, n);
661
0
            }
662
663
0
            if constexpr (Reverse) {
664
0
                auto start = std::max(len - n, 0);
665
0
                if (start > 0) {
666
0
                    memcpy(&res[offset], &chars[offset], start);
667
0
                }
668
0
                offset += start;
669
0
            } else {
670
0
                if (n < len) {
671
0
                    memcpy(&res[offset + n], &chars[offset + n], len - n);
672
0
                }
673
0
            }
674
675
0
            len = std::min(n, len);
676
0
            FunctionMask::mask(&chars[offset], len, FunctionMask::DEFAULT_UPPER_MASK,
677
0
                               FunctionMask::DEFAULT_LOWER_MASK, FunctionMask::DEFAULT_NUMBER_MASK,
678
0
                               &res[offset]);
679
0
        }
680
681
0
        return Status::OK();
682
0
    }
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb1EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb1EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb0EE6vectorILb1EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
Unexecuted instantiation: _ZN5doris19FunctionMaskPartialILb0EE6vectorILb0EEENS_6StatusERKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS5_
683
};
684
685
class FunctionLeft : public IFunction {
686
public:
687
    static constexpr auto name = "left";
688
174
    static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
689
1
    String get_name() const override { return name; }
690
166
    size_t get_number_of_arguments() const override { return 2; }
691
166
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
692
166
        return std::make_shared<DataTypeString>();
693
166
    }
694
695
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
696
133
                        uint32_t result, size_t input_rows_count) const override {
697
133
        DCHECK_EQ(arguments.size(), 2);
698
133
        auto res = ColumnString::create();
699
133
        bool col_const[2];
700
133
        ColumnPtr argument_columns[2];
701
399
        for (int i = 0; i < 2; ++i) {
702
266
            std::tie(argument_columns[i], col_const[i]) =
703
266
                    unpack_if_const(block.get_by_position(arguments[i]).column);
704
266
        }
705
706
133
        const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]);
707
133
        const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]);
708
133
        const auto is_ascii = str_col.is_ascii();
709
710
133
        std::visit(
711
133
                [&](auto is_ascii, auto str_const, auto len_const) {
712
133
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
133
                                                             input_rows_count);
714
133
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
711
9
                [&](auto is_ascii, auto str_const, auto len_const) {
712
9
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
9
                                                             input_rows_count);
714
9
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
711
8
                [&](auto is_ascii, auto str_const, auto len_const) {
712
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
8
                                                             input_rows_count);
714
8
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
711
8
                [&](auto is_ascii, auto str_const, auto len_const) {
712
8
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
8
                                                             input_rows_count);
714
8
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
_ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
711
36
                [&](auto is_ascii, auto str_const, auto len_const) {
712
36
                    _execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
713
36
                                                             input_rows_count);
714
36
                },
Unexecuted instantiation: _ZZNK5doris12FunctionLeft12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
715
133
                make_bool_variant(is_ascii), make_bool_variant(col_const[0]),
716
133
                make_bool_variant(col_const[1]));
717
718
133
        block.get_by_position(result).column = std::move(res);
719
133
        return Status::OK();
720
133
    }
721
722
    template <bool is_ascii, bool str_const, bool len_const>
723
    static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res,
724
133
                         size_t size) {
725
133
        auto& res_chars = res.get_chars();
726
133
        auto& res_offsets = res.get_offsets();
727
133
        res_offsets.resize(size);
728
133
        const auto& len_data = len_col.get_data();
729
730
133
        if constexpr (str_const) {
731
44
            res_chars.reserve(size * (str_col.get_chars().size()));
732
89
        } else {
733
89
            res_chars.reserve(str_col.get_chars().size());
734
89
        }
735
736
320
        for (int i = 0; i < size; ++i) {
737
187
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
187
            int len = len_data[index_check_const<len_const>(i)];
739
187
            if (len <= 0 || str.empty()) {
740
55
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
55
                continue;
742
55
            }
743
744
132
            const char* begin = str.begin();
745
132
            const char* p = begin;
746
747
132
            if constexpr (is_ascii) {
748
78
                p = begin + std::min(len, static_cast<int>(str.size));
749
78
            } else {
750
54
                const char* end = str.end();
751
396
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
342
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
342
                }
754
54
            }
755
756
132
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
132
                                                                    res_offsets);
758
132
        }
759
133
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
9
                         size_t size) {
725
9
        auto& res_chars = res.get_chars();
726
9
        auto& res_offsets = res.get_offsets();
727
9
        res_offsets.resize(size);
728
9
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
9
        } else {
733
9
            res_chars.reserve(str_col.get_chars().size());
734
9
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
63
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
63
            int len = len_data[index_check_const<len_const>(i)];
739
63
            if (len <= 0 || str.empty()) {
740
23
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
23
                continue;
742
23
            }
743
744
40
            const char* begin = str.begin();
745
40
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
40
            } else {
750
40
                const char* end = str.end();
751
314
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
274
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
274
                }
754
40
            }
755
756
40
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
40
                                                                    res_offsets);
758
40
        }
759
9
    }
_ZN5doris12FunctionLeft8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
8
                         size_t size) {
725
8
        auto& res_chars = res.get_chars();
726
8
        auto& res_offsets = res.get_offsets();
727
8
        res_offsets.resize(size);
728
8
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
8
        } else {
733
8
            res_chars.reserve(str_col.get_chars().size());
734
8
        }
735
736
16
        for (int i = 0; i < size; ++i) {
737
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
8
            int len = len_data[index_check_const<len_const>(i)];
739
8
            if (len <= 0 || str.empty()) {
740
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
1
                continue;
742
1
            }
743
744
7
            const char* begin = str.begin();
745
7
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
7
            } else {
750
7
                const char* end = str.end();
751
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
34
                }
754
7
            }
755
756
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
7
                                                                    res_offsets);
758
7
        }
759
8
    }
_ZN5doris12FunctionLeft8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
8
                         size_t size) {
725
8
        auto& res_chars = res.get_chars();
726
8
        auto& res_offsets = res.get_offsets();
727
8
        res_offsets.resize(size);
728
8
        const auto& len_data = len_col.get_data();
729
730
8
        if constexpr (str_const) {
731
8
            res_chars.reserve(size * (str_col.get_chars().size()));
732
        } else {
733
            res_chars.reserve(str_col.get_chars().size());
734
        }
735
736
16
        for (int i = 0; i < size; ++i) {
737
8
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
8
            int len = len_data[index_check_const<len_const>(i)];
739
8
            if (len <= 0 || str.empty()) {
740
1
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
1
                continue;
742
1
            }
743
744
7
            const char* begin = str.begin();
745
7
            const char* p = begin;
746
747
            if constexpr (is_ascii) {
748
                p = begin + std::min(len, static_cast<int>(str.size));
749
7
            } else {
750
7
                const char* end = str.end();
751
41
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
34
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
34
                }
754
7
            }
755
756
7
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
7
                                                                    res_offsets);
758
7
        }
759
8
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
36
        } else {
733
36
            res_chars.reserve(str_col.get_chars().size());
734
36
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
_ZN5doris12FunctionLeft8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
        if constexpr (str_const) {
731
            res_chars.reserve(size * (str_col.get_chars().size()));
732
36
        } else {
733
36
            res_chars.reserve(str_col.get_chars().size());
734
36
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
_ZN5doris12FunctionLeft8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
Line
Count
Source
724
36
                         size_t size) {
725
36
        auto& res_chars = res.get_chars();
726
36
        auto& res_offsets = res.get_offsets();
727
36
        res_offsets.resize(size);
728
36
        const auto& len_data = len_col.get_data();
729
730
36
        if constexpr (str_const) {
731
36
            res_chars.reserve(size * (str_col.get_chars().size()));
732
        } else {
733
            res_chars.reserve(str_col.get_chars().size());
734
        }
735
736
72
        for (int i = 0; i < size; ++i) {
737
36
            auto str = str_col.get_data_at(index_check_const<str_const>(i));
738
36
            int len = len_data[index_check_const<len_const>(i)];
739
36
            if (len <= 0 || str.empty()) {
740
10
                StringOP::push_empty_string(i, res_chars, res_offsets);
741
10
                continue;
742
10
            }
743
744
26
            const char* begin = str.begin();
745
26
            const char* p = begin;
746
747
26
            if constexpr (is_ascii) {
748
26
                p = begin + std::min(len, static_cast<int>(str.size));
749
            } else {
750
                const char* end = str.end();
751
                for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
752
                    char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
753
                }
754
            }
755
756
26
            StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
757
26
                                                                    res_offsets);
758
26
        }
759
36
    }
Unexecuted instantiation: _ZN5doris12FunctionLeft8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEERKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERS3_m
760
};
761
762
class FunctionRight : public IFunction {
763
public:
764
    static constexpr auto name = "right";
765
87
    static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
766
1
    String get_name() const override { return name; }
767
79
    size_t get_number_of_arguments() const override { return 2; }
768
79
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
769
79
        return std::make_shared<DataTypeString>();
770
79
    }
771
772
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
773
61
                        uint32_t result, size_t input_rows_count) const override {
774
61
        auto int_type = std::make_shared<DataTypeInt32>();
775
61
        auto params1 = ColumnInt32::create(input_rows_count);
776
61
        auto params2 = ColumnInt32::create(input_rows_count);
777
61
        size_t num_columns_without_result = block.columns();
778
779
        // params1 = max(arg[1], -len(arg))
780
61
        auto& index_data = params1->get_data();
781
61
        auto& strlen_data = params2->get_data();
782
783
61
        auto str_col =
784
61
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
785
61
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
786
61
        auto pos_col =
787
61
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
788
61
        const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
789
790
147
        for (int i = 0; i < input_rows_count; ++i) {
791
86
            auto str = str_column->get_data_at(i);
792
86
            strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size);
793
86
        }
794
795
147
        for (int i = 0; i < input_rows_count; ++i) {
796
86
            index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
797
86
        }
798
799
61
        block.insert({std::move(params1), int_type, "index"});
800
61
        block.insert({std::move(params2), int_type, "strlen"});
801
802
61
        ColumnNumbers temp_arguments(3);
803
61
        temp_arguments[0] = arguments[0];
804
61
        temp_arguments[1] = num_columns_without_result;
805
61
        temp_arguments[2] = num_columns_without_result + 1;
806
61
        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
807
61
        return Status::OK();
808
61
    }
809
};
810
811
struct NullOrEmptyImpl {
812
0
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; }
813
814
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
815
5
                          uint32_t result, size_t input_rows_count, bool reverse) {
816
5
        auto res_map = ColumnUInt8::create(input_rows_count, 0);
817
818
5
        auto column = block.get_by_position(arguments[0]).column;
819
5
        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
820
5
            column = nullable->get_nested_column_ptr();
821
5
            VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data());
822
5
        }
823
5
        auto str_col = assert_cast<const ColumnString*>(column.get());
824
5
        const auto& offsets = str_col->get_offsets();
825
826
5
        auto& res_map_data = res_map->get_data();
827
13
        for (int i = 0; i < input_rows_count; ++i) {
828
8
            int size = offsets[i] - offsets[i - 1];
829
8
            res_map_data[i] |= (size == 0);
830
8
        }
831
5
        if (reverse) {
832
0
            for (int i = 0; i < input_rows_count; ++i) {
833
0
                res_map_data[i] = !res_map_data[i];
834
0
            }
835
0
        }
836
837
5
        block.replace_by_position(result, std::move(res_map));
838
5
        return Status::OK();
839
5
    }
840
};
841
842
class FunctionNullOrEmpty : public IFunction {
843
public:
844
    static constexpr auto name = "null_or_empty";
845
13
    static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); }
846
1
    String get_name() const override { return name; }
847
5
    size_t get_number_of_arguments() const override { return 1; }
848
849
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
850
5
        return std::make_shared<DataTypeUInt8>();
851
5
    }
852
853
10
    bool use_default_implementation_for_nulls() const override { return false; }
854
855
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
856
5
                        uint32_t result, size_t input_rows_count) const override {
857
5
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
858
5
                                                 input_rows_count, false));
859
5
        return Status::OK();
860
5
    }
861
};
862
863
class FunctionNotNullOrEmpty : public IFunction {
864
public:
865
    static constexpr auto name = "not_null_or_empty";
866
8
    static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); }
867
1
    String get_name() const override { return name; }
868
0
    size_t get_number_of_arguments() const override { return 1; }
869
870
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
871
0
        return std::make_shared<DataTypeUInt8>();
872
0
    }
873
874
0
    bool use_default_implementation_for_nulls() const override { return false; }
875
876
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
877
0
                        uint32_t result, size_t input_rows_count) const override {
878
0
        RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
879
0
                                                 input_rows_count, true));
880
0
        return Status::OK();
881
0
    }
882
};
883
884
class FunctionStringConcat : public IFunction {
885
public:
886
    struct ConcatState {
887
        bool use_state = false;
888
        std::string tail;
889
    };
890
891
    static constexpr auto name = "concat";
892
561
    static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); }
893
0
    String get_name() const override { return name; }
894
0
    size_t get_number_of_arguments() const override { return 0; }
895
554
    bool is_variadic() const override { return true; }
896
897
553
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
898
553
        return std::make_shared<DataTypeString>();
899
553
    }
900
901
1.29k
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
902
1.29k
        if (scope == FunctionContext::THREAD_LOCAL) {
903
743
            return Status::OK();
904
743
        }
905
554
        std::shared_ptr<ConcatState> state = std::make_shared<ConcatState>();
906
907
554
        context->set_function_state(scope, state);
908
909
554
        state->use_state = true;
910
911
        // Optimize function calls like this:
912
        // concat(col, "123", "abc", "456") -> tail = "123abc456"
913
960
        for (size_t i = 1; i < context->get_num_args(); i++) {
914
763
            const auto* column_string = context->get_constant_col(i);
915
763
            if (column_string == nullptr) {
916
329
                state->use_state = false;
917
329
                return IFunction::open(context, scope);
918
329
            }
919
434
            auto string_vale = column_string->column_ptr->get_data_at(0);
920
434
            if (string_vale.data == nullptr) {
921
                // For concat(col, null), it is handled by default_implementation_for_nulls
922
28
                state->use_state = false;
923
28
                return IFunction::open(context, scope);
924
28
            }
925
926
406
            state->tail.append(string_vale.begin(), string_vale.size);
927
406
        }
928
929
        // The reserve is used here to allow the usage of memcpy_small_allow_read_write_overflow15 below.
930
197
        state->tail.reserve(state->tail.size() + 16);
931
932
197
        return IFunction::open(context, scope);
933
554
    }
934
935
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
936
478
                        uint32_t result, size_t input_rows_count) const override {
937
478
        DCHECK_GE(arguments.size(), 1);
938
939
478
        if (arguments.size() == 1) {
940
3
            block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
941
3
            return Status::OK();
942
3
        }
943
475
        auto* concat_state = reinterpret_cast<ConcatState*>(
944
475
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
945
475
        if (!concat_state) {
946
0
            return Status::RuntimeError("funciton context for function '{}' must have ConcatState;",
947
0
                                        get_name());
948
0
        }
949
475
        if (concat_state->use_state) {
950
175
            const auto& [col, is_const] =
951
175
                    unpack_if_const(block.get_by_position(arguments[0]).column);
952
175
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
953
175
            if (is_const) {
954
0
                return execute_const<true>(concat_state, block, col_str, result, input_rows_count);
955
175
            } else {
956
175
                return execute_const<false>(concat_state, block, col_str, result, input_rows_count);
957
175
            }
958
959
300
        } else {
960
300
            return execute_vecotr(block, arguments, result, input_rows_count);
961
300
        }
962
475
    }
963
964
    Status execute_vecotr(Block& block, const ColumnNumbers& arguments, uint32_t result,
965
300
                          size_t input_rows_count) const {
966
300
        int argument_size = arguments.size();
967
300
        std::vector<ColumnPtr> argument_columns(argument_size);
968
969
300
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
970
300
        std::vector<const ColumnString::Chars*> chars_list(argument_size);
971
300
        std::vector<bool> is_const_args(argument_size);
972
973
1.11k
        for (int i = 0; i < argument_size; ++i) {
974
818
            const auto& [col, is_const] =
975
818
                    unpack_if_const(block.get_by_position(arguments[i]).column);
976
977
818
            const auto* col_str = assert_cast<const ColumnString*>(col.get());
978
818
            offsets_list[i] = &col_str->get_offsets();
979
818
            chars_list[i] = &col_str->get_chars();
980
818
            is_const_args[i] = is_const;
981
818
        }
982
983
300
        auto res = ColumnString::create();
984
300
        auto& res_data = res->get_chars();
985
300
        auto& res_offset = res->get_offsets();
986
987
300
        res_offset.resize(input_rows_count);
988
300
        size_t res_reserve_size = 0;
989
1.11k
        for (size_t i = 0; i < argument_size; ++i) {
990
818
            if (is_const_args[i]) {
991
309
                res_reserve_size += (*offsets_list[i])[0] * input_rows_count;
992
509
            } else {
993
509
                res_reserve_size += (*offsets_list[i])[input_rows_count - 1];
994
509
            }
995
818
        }
996
997
300
        ColumnString::check_chars_length(res_reserve_size, 0);
998
999
300
        res_data.resize(res_reserve_size);
1000
1001
300
        auto* data = res_data.data();
1002
300
        size_t dst_offset = 0;
1003
1004
2.14k
        for (size_t i = 0; i < input_rows_count; ++i) {
1005
7.17k
            for (size_t j = 0; j < argument_size; ++j) {
1006
5.33k
                const auto& current_offsets = *offsets_list[j];
1007
5.33k
                const auto& current_chars = *chars_list[j];
1008
5.33k
                auto idx = index_check_const(i, is_const_args[j]);
1009
5.33k
                const auto size = current_offsets[idx] - current_offsets[idx - 1];
1010
5.33k
                if (size > 0) {
1011
5.24k
                    memcpy_small_allow_read_write_overflow15(
1012
5.24k
                            data + dst_offset, current_chars.data() + current_offsets[idx - 1],
1013
5.24k
                            size);
1014
5.24k
                    dst_offset += size;
1015
5.24k
                }
1016
5.33k
            }
1017
1.84k
            res_offset[i] = dst_offset;
1018
1.84k
        }
1019
1020
300
        block.get_by_position(result).column = std::move(res);
1021
300
        return Status::OK();
1022
300
    }
1023
1024
    template <bool is_const>
1025
    Status execute_const(ConcatState* concat_state, Block& block, const ColumnString* col_str,
1026
175
                         uint32_t result, size_t input_rows_count) const {
1027
        // using tail optimize
1028
1029
175
        auto res = ColumnString::create();
1030
175
        auto& res_data = res->get_chars();
1031
175
        auto& res_offset = res->get_offsets();
1032
175
        res_offset.resize(input_rows_count);
1033
1034
175
        size_t res_reserve_size = 0;
1035
175
        if constexpr (is_const) {
1036
0
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
1037
175
        } else {
1038
175
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
1039
175
        }
1040
175
        res_reserve_size += concat_state->tail.size() * input_rows_count;
1041
1042
175
        ColumnString::check_chars_length(res_reserve_size, 0);
1043
175
        res_data.resize(res_reserve_size);
1044
1045
175
        const auto& tail = concat_state->tail;
1046
175
        auto* data = res_data.data();
1047
175
        size_t dst_offset = 0;
1048
1049
352
        for (size_t i = 0; i < input_rows_count; ++i) {
1050
177
            const auto idx = index_check_const<is_const>(i);
1051
177
            StringRef str_val = col_str->get_data_at(idx);
1052
            // copy column
1053
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
1054
177
            dst_offset += str_val.size;
1055
            // copy tail
1056
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
1057
177
            dst_offset += tail.size();
1058
177
            res_offset[i] = dst_offset;
1059
177
        }
1060
175
        block.get_by_position(result).column = std::move(res);
1061
175
        return Status::OK();
1062
175
    }
Unexecuted instantiation: _ZNK5doris20FunctionStringConcat13execute_constILb1EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
_ZNK5doris20FunctionStringConcat13execute_constILb0EEENS_6StatusEPNS0_11ConcatStateERNS_5BlockEPKNS_9ColumnStrIjEEjm
Line
Count
Source
1026
175
                         uint32_t result, size_t input_rows_count) const {
1027
        // using tail optimize
1028
1029
175
        auto res = ColumnString::create();
1030
175
        auto& res_data = res->get_chars();
1031
175
        auto& res_offset = res->get_offsets();
1032
175
        res_offset.resize(input_rows_count);
1033
1034
175
        size_t res_reserve_size = 0;
1035
        if constexpr (is_const) {
1036
            res_reserve_size = col_str->get_offsets()[0] * input_rows_count;
1037
175
        } else {
1038
175
            res_reserve_size = col_str->get_offsets()[input_rows_count - 1];
1039
175
        }
1040
175
        res_reserve_size += concat_state->tail.size() * input_rows_count;
1041
1042
175
        ColumnString::check_chars_length(res_reserve_size, 0);
1043
175
        res_data.resize(res_reserve_size);
1044
1045
175
        const auto& tail = concat_state->tail;
1046
175
        auto* data = res_data.data();
1047
175
        size_t dst_offset = 0;
1048
1049
352
        for (size_t i = 0; i < input_rows_count; ++i) {
1050
177
            const auto idx = index_check_const<is_const>(i);
1051
177
            StringRef str_val = col_str->get_data_at(idx);
1052
            // copy column
1053
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, str_val.data, str_val.size);
1054
177
            dst_offset += str_val.size;
1055
            // copy tail
1056
177
            memcpy_small_allow_read_write_overflow15(data + dst_offset, tail.data(), tail.size());
1057
177
            dst_offset += tail.size();
1058
177
            res_offset[i] = dst_offset;
1059
177
        }
1060
175
        block.get_by_position(result).column = std::move(res);
1061
175
        return Status::OK();
1062
175
    }
1063
};
1064
1065
class FunctionStringElt : public IFunction {
1066
public:
1067
    static constexpr auto name = "elt";
1068
366
    static FunctionPtr create() { return std::make_shared<FunctionStringElt>(); }
1069
0
    String get_name() const override { return name; }
1070
0
    size_t get_number_of_arguments() const override { return 0; }
1071
359
    bool is_variadic() const override { return true; }
1072
1073
358
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1074
358
        return make_nullable(std::make_shared<DataTypeString>());
1075
358
    }
1076
716
    bool use_default_implementation_for_nulls() const override { return false; }
1077
1078
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1079
358
                        uint32_t result, size_t input_rows_count) const override {
1080
358
        int arguent_size = arguments.size();
1081
358
        int num_children = arguent_size - 1;
1082
358
        auto res = ColumnString::create();
1083
1084
358
        if (auto const_column = check_and_get_column<ColumnConst>(
1085
358
                    *block.get_by_position(arguments[0]).column)) {
1086
153
            auto data = const_column->get_data_at(0);
1087
            // return NULL, pos is null or pos < 0 or pos > num_children
1088
153
            auto is_null = data.data == nullptr;
1089
153
            auto pos = is_null ? 0 : *(Int32*)data.data;
1090
153
            is_null = pos <= 0 || pos > num_children;
1091
1092
153
            auto null_map = ColumnUInt8::create(input_rows_count, is_null);
1093
153
            if (is_null) {
1094
135
                res->insert_many_defaults(input_rows_count);
1095
135
            } else {
1096
18
                auto& target_column = block.get_by_position(arguments[pos]).column;
1097
18
                if (auto target_const_column = check_and_get_column<ColumnConst>(*target_column)) {
1098
6
                    auto target_data = target_const_column->get_data_at(0);
1099
                    // return NULL, no target data
1100
6
                    if (target_data.data == nullptr) {
1101
0
                        null_map = ColumnUInt8::create(input_rows_count, true);
1102
0
                        res->insert_many_defaults(input_rows_count);
1103
6
                    } else {
1104
6
                        res->insert_data_repeatedly(target_data.data, target_data.size,
1105
6
                                                    input_rows_count);
1106
6
                    }
1107
12
                } else if (auto target_nullable_column =
1108
12
                                   check_and_get_column<ColumnNullable>(*target_column)) {
1109
12
                    auto& target_null_map = target_nullable_column->get_null_map_data();
1110
12
                    VectorizedUtils::update_null_map(
1111
12
                            assert_cast<ColumnUInt8&>(*null_map).get_data(), target_null_map);
1112
1113
12
                    auto& target_str_column = assert_cast<const ColumnString&>(
1114
12
                            target_nullable_column->get_nested_column());
1115
12
                    res->get_chars().assign(target_str_column.get_chars().begin(),
1116
12
                                            target_str_column.get_chars().end());
1117
12
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
1118
12
                                              target_str_column.get_offsets().end());
1119
12
                } else {
1120
0
                    auto& target_str_column = assert_cast<const ColumnString&>(*target_column);
1121
0
                    res->get_chars().assign(target_str_column.get_chars().begin(),
1122
0
                                            target_str_column.get_chars().end());
1123
0
                    res->get_offsets().assign(target_str_column.get_offsets().begin(),
1124
0
                                              target_str_column.get_offsets().end());
1125
0
                }
1126
18
            }
1127
153
            block.get_by_position(result).column =
1128
153
                    ColumnNullable::create(std::move(res), std::move(null_map));
1129
205
        } else if (auto pos_null_column = check_and_get_column<ColumnNullable>(
1130
205
                           *block.get_by_position(arguments[0]).column)) {
1131
205
            auto& pos_column =
1132
205
                    assert_cast<const ColumnInt32&>(pos_null_column->get_nested_column());
1133
205
            auto& pos_null_map = pos_null_column->get_null_map_data();
1134
205
            auto null_map = ColumnUInt8::create(input_rows_count, false);
1135
205
            auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data();
1136
1137
460
            for (size_t i = 0; i < input_rows_count; ++i) {
1138
255
                auto pos = pos_column.get_element(i);
1139
255
                res_null_map[i] =
1140
255
                        pos_null_map[i] || pos <= 0 || pos > num_children ||
1141
255
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
1142
30
                                nullptr;
1143
255
                if (res_null_map[i]) {
1144
225
                    res->insert_default();
1145
225
                } else {
1146
30
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
1147
30
                    res->insert_data(insert_data.data, insert_data.size);
1148
30
                }
1149
255
            }
1150
205
            block.get_by_position(result).column =
1151
205
                    ColumnNullable::create(std::move(res), std::move(null_map));
1152
205
        } else {
1153
0
            auto& pos_column =
1154
0
                    assert_cast<const ColumnInt32&>(*block.get_by_position(arguments[0]).column);
1155
0
            auto null_map = ColumnUInt8::create(input_rows_count, false);
1156
0
            auto& res_null_map = assert_cast<ColumnUInt8&>(*null_map).get_data();
1157
1158
0
            for (size_t i = 0; i < input_rows_count; ++i) {
1159
0
                auto pos = pos_column.get_element(i);
1160
0
                res_null_map[i] =
1161
0
                        pos <= 0 || pos > num_children ||
1162
0
                        block.get_by_position(arguments[pos]).column->get_data_at(i).data ==
1163
0
                                nullptr;
1164
0
                if (res_null_map[i]) {
1165
0
                    res->insert_default();
1166
0
                } else {
1167
0
                    auto insert_data = block.get_by_position(arguments[pos]).column->get_data_at(i);
1168
0
                    res->insert_data(insert_data.data, insert_data.size);
1169
0
                }
1170
0
            }
1171
0
            block.get_by_position(result).column =
1172
0
                    ColumnNullable::create(std::move(res), std::move(null_map));
1173
0
        }
1174
358
        return Status::OK();
1175
358
    }
1176
};
1177
1178
// concat_ws (string,string....) or (string, Array)
1179
// TODO: avoid use fmtlib
1180
class FunctionStringConcatWs : public IFunction {
1181
public:
1182
    using Chars = ColumnString::Chars;
1183
    using Offsets = ColumnString::Offsets;
1184
1185
    static constexpr auto name = "concat_ws";
1186
457
    static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); }
1187
0
    String get_name() const override { return name; }
1188
0
    size_t get_number_of_arguments() const override { return 0; }
1189
450
    bool is_variadic() const override { return true; }
1190
1191
449
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1192
449
        const IDataType* first_type = arguments[0].get();
1193
449
        if (first_type->is_nullable()) {
1194
449
            return make_nullable(std::make_shared<DataTypeString>());
1195
449
        } else {
1196
0
            return std::make_shared<DataTypeString>();
1197
0
        }
1198
449
    }
1199
898
    bool use_default_implementation_for_nulls() const override { return false; }
1200
1201
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1202
449
                        uint32_t result, size_t input_rows_count) const override {
1203
449
        DCHECK_GE(arguments.size(), 2);
1204
449
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1205
        // we create a zero column to simply implement
1206
449
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1207
449
        auto res = ColumnString::create();
1208
449
        bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable();
1209
449
        size_t argument_size = arguments.size();
1210
449
        std::vector<const Offsets*> offsets_list(argument_size);
1211
449
        std::vector<const Chars*> chars_list(argument_size);
1212
449
        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
1213
1214
449
        std::vector<ColumnPtr> argument_columns(argument_size);
1215
449
        std::vector<ColumnPtr> argument_null_columns(argument_size);
1216
1217
1.53k
        for (size_t i = 0; i < argument_size; ++i) {
1218
1.08k
            argument_columns[i] =
1219
1.08k
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
1220
1.08k
            if (const auto* nullable =
1221
1.08k
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
1222
                // Danger: Here must dispose the null map data first! Because
1223
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
1224
                // of column nullable mem of null map
1225
1.08k
                null_list[i] = &nullable->get_null_map_data();
1226
1.08k
                argument_null_columns[i] = nullable->get_null_map_column_ptr();
1227
1.08k
                argument_columns[i] = nullable->get_nested_column_ptr();
1228
1.08k
            } else {
1229
0
                null_list[i] = &const_null_map->get_data();
1230
0
            }
1231
1232
1.08k
            if (is_column<ColumnArray>(argument_columns[i].get())) {
1233
36
                continue;
1234
36
            }
1235
1236
1.05k
            const auto* col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
1237
1.05k
            offsets_list[i] = &col_str->get_offsets();
1238
1.05k
            chars_list[i] = &col_str->get_chars();
1239
1.05k
        }
1240
1241
449
        auto& res_data = res->get_chars();
1242
449
        auto& res_offset = res->get_offsets();
1243
449
        res_offset.resize(input_rows_count);
1244
1245
449
        VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]);
1246
449
        fmt::memory_buffer buffer;
1247
449
        std::vector<std::string_view> views;
1248
1249
449
        if (is_column<ColumnArray>(argument_columns[1].get())) {
1250
            // Determine if the nested type of the array is String
1251
36
            const auto& array_column = reinterpret_cast<const ColumnArray&>(*argument_columns[1]);
1252
36
            if (!array_column.get_data().is_column_string()) {
1253
0
                return Status::NotSupported(
1254
0
                        fmt::format("unsupported nested array of type {} for function {}",
1255
0
                                    is_column_nullable(array_column.get_data())
1256
0
                                            ? array_column.get_data().get_name()
1257
0
                                            : array_column.get_data().get_name(),
1258
0
                                    get_name()));
1259
0
            }
1260
            // Concat string in array
1261
36
            _execute_array(input_rows_count, array_column, buffer, views, offsets_list, chars_list,
1262
36
                           null_list, res_data, res_offset);
1263
1264
413
        } else {
1265
            // Concat string
1266
413
            _execute_string(input_rows_count, argument_size, buffer, views, offsets_list,
1267
413
                            chars_list, null_list, res_data, res_offset);
1268
413
        }
1269
449
        if (is_null_type) {
1270
449
            block.get_by_position(result).column =
1271
449
                    ColumnNullable::create(std::move(res), std::move(null_map));
1272
449
        } else {
1273
0
            block.get_by_position(result).column = std::move(res);
1274
0
        }
1275
449
        return Status::OK();
1276
449
    }
1277
1278
private:
1279
    void _execute_array(const size_t& input_rows_count, const ColumnArray& array_column,
1280
                        fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
1281
                        const std::vector<const Offsets*>& offsets_list,
1282
                        const std::vector<const Chars*>& chars_list,
1283
                        const std::vector<const ColumnUInt8::Container*>& null_list,
1284
36
                        Chars& res_data, Offsets& res_offset) const {
1285
        // Get array nested column
1286
36
        const UInt8* array_nested_null_map = nullptr;
1287
36
        ColumnPtr array_nested_column = nullptr;
1288
1289
36
        if (is_column_nullable(array_column.get_data())) {
1290
36
            const auto& array_nested_null_column =
1291
36
                    reinterpret_cast<const ColumnNullable&>(array_column.get_data());
1292
            // String's null map in array
1293
36
            array_nested_null_map =
1294
36
                    array_nested_null_column.get_null_map_column().get_data().data();
1295
36
            array_nested_column = array_nested_null_column.get_nested_column_ptr();
1296
36
        } else {
1297
0
            array_nested_column = array_column.get_data_ptr();
1298
0
        }
1299
1300
36
        const auto& string_column = reinterpret_cast<const ColumnString&>(*array_nested_column);
1301
36
        const Chars& string_src_chars = string_column.get_chars();
1302
36
        const auto& src_string_offsets = string_column.get_offsets();
1303
36
        const auto& src_array_offsets = array_column.get_offsets();
1304
36
        size_t current_src_array_offset = 0;
1305
1306
        // Concat string in array
1307
76
        for (size_t i = 0; i < input_rows_count; ++i) {
1308
40
            auto& sep_offsets = *offsets_list[0];
1309
40
            auto& sep_chars = *chars_list[0];
1310
40
            auto& sep_nullmap = *null_list[0];
1311
1312
40
            if (sep_nullmap[i]) {
1313
8
                res_offset[i] = res_data.size();
1314
8
                current_src_array_offset += src_array_offsets[i] - src_array_offsets[i - 1];
1315
8
                continue;
1316
8
            }
1317
1318
32
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
1319
32
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
1320
1321
32
            std::string_view sep(sep_data, sep_size);
1322
32
            buffer.clear();
1323
32
            views.clear();
1324
1325
32
            for (auto next_src_array_offset = src_array_offsets[i];
1326
128
                 current_src_array_offset < next_src_array_offset; ++current_src_array_offset) {
1327
96
                const auto current_src_string_offset =
1328
96
                        current_src_array_offset ? src_string_offsets[current_src_array_offset - 1]
1329
96
                                                 : 0;
1330
96
                size_t bytes_to_copy =
1331
96
                        src_string_offsets[current_src_array_offset] - current_src_string_offset;
1332
96
                const char* ptr =
1333
96
                        reinterpret_cast<const char*>(&string_src_chars[current_src_string_offset]);
1334
1335
96
                if (array_nested_null_map == nullptr ||
1336
96
                    !array_nested_null_map[current_src_array_offset]) {
1337
96
                    views.emplace_back(ptr, bytes_to_copy);
1338
96
                }
1339
96
            }
1340
1341
32
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
1342
1343
32
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1344
32
                                        res_offset);
1345
32
        }
1346
36
    }
1347
1348
    void _execute_string(const size_t& input_rows_count, const size_t& argument_size,
1349
                         fmt::memory_buffer& buffer, std::vector<std::string_view>& views,
1350
                         const std::vector<const Offsets*>& offsets_list,
1351
                         const std::vector<const Chars*>& chars_list,
1352
                         const std::vector<const ColumnUInt8::Container*>& null_list,
1353
413
                         Chars& res_data, Offsets& res_offset) const {
1354
        // Concat string
1355
933
        for (size_t i = 0; i < input_rows_count; ++i) {
1356
520
            auto& sep_offsets = *offsets_list[0];
1357
520
            auto& sep_chars = *chars_list[0];
1358
520
            auto& sep_nullmap = *null_list[0];
1359
520
            if (sep_nullmap[i]) {
1360
72
                res_offset[i] = res_data.size();
1361
72
                continue;
1362
72
            }
1363
1364
448
            int sep_size = sep_offsets[i] - sep_offsets[i - 1];
1365
448
            const char* sep_data = reinterpret_cast<const char*>(&sep_chars[sep_offsets[i - 1]]);
1366
1367
448
            std::string_view sep(sep_data, sep_size);
1368
448
            buffer.clear();
1369
448
            views.clear();
1370
1.04k
            for (size_t j = 1; j < argument_size; ++j) {
1371
600
                auto& current_offsets = *offsets_list[j];
1372
600
                auto& current_chars = *chars_list[j];
1373
600
                auto& current_nullmap = *null_list[j];
1374
600
                int size = current_offsets[i] - current_offsets[i - 1];
1375
600
                const char* ptr =
1376
600
                        reinterpret_cast<const char*>(&current_chars[current_offsets[i - 1]]);
1377
600
                if (!current_nullmap[i]) {
1378
548
                    views.emplace_back(ptr, size);
1379
548
                }
1380
600
            }
1381
448
            fmt::format_to(buffer, "{}", fmt::join(views, sep));
1382
448
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1383
448
                                        res_offset);
1384
448
        }
1385
413
    }
1386
};
1387
1388
class FunctionStringRepeat : public IFunction {
1389
public:
1390
    static constexpr auto name = "repeat";
1391
189
    static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
1392
1
    String get_name() const override { return name; }
1393
181
    size_t get_number_of_arguments() const override { return 2; }
1394
    // should set NULL value of nested data to default,
1395
    // as iff it's not inited and invalid, the repeat result of length is so large cause overflow
1396
163
    bool need_replace_null_data_to_default() const override { return true; }
1397
1398
181
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1399
181
        return make_nullable(std::make_shared<DataTypeString>());
1400
181
    }
1401
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1402
163
                        uint32_t result, size_t input_rows_count) const override {
1403
163
        DCHECK_EQ(arguments.size(), 2);
1404
163
        auto res = ColumnString::create();
1405
163
        auto null_map = ColumnUInt8::create();
1406
1407
163
        ColumnPtr argument_ptr[2];
1408
163
        argument_ptr[0] =
1409
163
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1410
163
        argument_ptr[1] = block.get_by_position(arguments[1]).column;
1411
1412
163
        if (const auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
1413
163
            if (const auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
1414
109
                RETURN_IF_ERROR(vector_vector(col1->get_chars(), col1->get_offsets(),
1415
109
                                              col2->get_data(), res->get_chars(),
1416
109
                                              res->get_offsets(), null_map->get_data()));
1417
109
                block.replace_by_position(
1418
109
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1419
109
                return Status::OK();
1420
109
            } else if (const auto* col2_const =
1421
54
                               check_and_get_column<ColumnConst>(*argument_ptr[1])) {
1422
54
                DCHECK(check_and_get_column<ColumnInt32>(col2_const->get_data_column()));
1423
54
                int repeat = col2_const->get_int(0);
1424
54
                if (repeat <= 0) {
1425
18
                    null_map->get_data().resize_fill(input_rows_count, 0);
1426
18
                    res->insert_many_defaults(input_rows_count);
1427
36
                } else {
1428
36
                    vector_const(col1->get_chars(), col1->get_offsets(), repeat, res->get_chars(),
1429
36
                                 res->get_offsets(), null_map->get_data());
1430
36
                }
1431
54
                block.replace_by_position(
1432
54
                        result, ColumnNullable::create(std::move(res), std::move(null_map)));
1433
54
                return Status::OK();
1434
54
            }
1435
163
        }
1436
1437
0
        return Status::RuntimeError("repeat function get error param: {}, {}",
1438
0
                                    argument_ptr[0]->get_name(), argument_ptr[1]->get_name());
1439
163
    }
1440
1441
    Status vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1442
                         const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
1443
                         ColumnString::Offsets& res_offsets,
1444
109
                         ColumnUInt8::Container& null_map) const {
1445
109
        size_t input_row_size = offsets.size();
1446
1447
109
        fmt::memory_buffer buffer;
1448
109
        res_offsets.resize(input_row_size);
1449
109
        null_map.resize_fill(input_row_size, 0);
1450
277
        for (ssize_t i = 0; i < input_row_size; ++i) {
1451
168
            buffer.clear();
1452
168
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1453
168
            size_t size = offsets[i] - offsets[i - 1];
1454
168
            int repeat = repeats[i];
1455
168
            if (repeat <= 0) {
1456
56
                StringOP::push_empty_string(i, res_data, res_offsets);
1457
112
            } else {
1458
112
                ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
1459
644
                for (int j = 0; j < repeat; ++j) {
1460
532
                    buffer.append(raw_str, raw_str + size);
1461
532
                }
1462
112
                StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
1463
112
                                            res_data, res_offsets);
1464
112
            }
1465
168
        }
1466
109
        return Status::OK();
1467
109
    }
1468
1469
    // TODO: 1. use pmr::vector<char> replace fmt_buffer may speed up the code
1470
    //       2. abstract the `vector_vector` and `vector_const`
1471
    //       3. rethink we should use `DEFAULT_MAX_STRING_SIZE` to bigger here
1472
    void vector_const(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1473
                      int repeat, ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets,
1474
36
                      ColumnUInt8::Container& null_map) const {
1475
36
        size_t input_row_size = offsets.size();
1476
1477
36
        fmt::memory_buffer buffer;
1478
36
        res_offsets.resize(input_row_size);
1479
36
        null_map.resize_fill(input_row_size, 0);
1480
72
        for (ssize_t i = 0; i < input_row_size; ++i) {
1481
36
            buffer.clear();
1482
36
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1483
36
            size_t size = offsets[i] - offsets[i - 1];
1484
36
            ColumnString::check_chars_length(repeat * size + res_data.size(), 0);
1485
1486
207
            for (int j = 0; j < repeat; ++j) {
1487
171
                buffer.append(raw_str, raw_str + size);
1488
171
            }
1489
36
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1490
36
                                        res_offsets);
1491
36
        }
1492
36
    }
1493
};
1494
1495
template <typename Impl>
1496
class FunctionStringPad : public IFunction {
1497
public:
1498
    static constexpr auto name = Impl::name;
1499
1.38k
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringLPadEE6createEv
Line
Count
Source
1499
695
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
_ZN5doris17FunctionStringPadINS_10StringRPadEE6createEv
Line
Count
Source
1499
688
    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
1500
2
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE8get_nameB5cxx11Ev
Line
Count
Source
1500
1
    String get_name() const override { return name; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE8get_nameB5cxx11Ev
Line
Count
Source
1500
1
    String get_name() const override { return name; }
1501
1.36k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE23get_number_of_argumentsEv
Line
Count
Source
1501
687
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE23get_number_of_argumentsEv
Line
Count
Source
1501
680
    size_t get_number_of_arguments() const override { return 3; }
1502
1503
1.36k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
1.36k
        return make_nullable(std::make_shared<DataTypeString>());
1505
1.36k
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1503
687
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
687
        return make_nullable(std::make_shared<DataTypeString>());
1505
687
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1503
680
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1504
680
        return make_nullable(std::make_shared<DataTypeString>());
1505
680
    }
1506
1507
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1508
870
                        uint32_t result, size_t input_rows_count) const override {
1509
870
        DCHECK_GE(arguments.size(), 3);
1510
870
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
870
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
870
        auto res = ColumnString::create();
1514
1515
870
        ColumnPtr col[3];
1516
870
        bool col_const[3];
1517
3.48k
        for (size_t i = 0; i < 3; ++i) {
1518
2.61k
            std::tie(col[i], col_const[i]) =
1519
2.61k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
2.61k
        }
1521
870
        auto& null_map_data = null_map->get_data();
1522
870
        auto& res_offsets = res->get_offsets();
1523
870
        auto& res_chars = res->get_chars();
1524
870
        res_offsets.resize(input_rows_count);
1525
1526
870
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
870
        const auto& strcol_offsets = strcol->get_offsets();
1528
870
        const auto& strcol_chars = strcol->get_chars();
1529
1530
870
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
870
        const auto& col_len_data = col_len->get_data();
1532
1533
870
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
870
        const auto& padcol_offsets = padcol->get_offsets();
1535
870
        const auto& padcol_chars = padcol->get_chars();
1536
870
        std::visit(
1537
870
                [&](auto str_const, auto len_const, auto pad_const) {
1538
870
                    execute_utf8<str_const, len_const, pad_const>(
1539
870
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
870
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
870
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
1537
63
                [&](auto str_const, auto len_const, auto pad_const) {
1538
63
                    execute_utf8<str_const, len_const, pad_const>(
1539
63
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
63
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
63
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
1537
63
                [&](auto str_const, auto len_const, auto pad_const) {
1538
63
                    execute_utf8<str_const, len_const, pad_const>(
1539
63
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
63
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
63
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
_ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
1537
62
                [&](auto str_const, auto len_const, auto pad_const) {
1538
62
                    execute_utf8<str_const, len_const, pad_const>(
1539
62
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
62
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
62
                },
Unexecuted instantiation: _ZZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
1542
870
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
870
                make_bool_variant(col_const[2]));
1544
1545
870
        block.get_by_position(result).column =
1546
870
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
870
        return Status::OK();
1548
870
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1508
435
                        uint32_t result, size_t input_rows_count) const override {
1509
435
        DCHECK_GE(arguments.size(), 3);
1510
435
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
435
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
435
        auto res = ColumnString::create();
1514
1515
435
        ColumnPtr col[3];
1516
435
        bool col_const[3];
1517
1.74k
        for (size_t i = 0; i < 3; ++i) {
1518
1.30k
            std::tie(col[i], col_const[i]) =
1519
1.30k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
1.30k
        }
1521
435
        auto& null_map_data = null_map->get_data();
1522
435
        auto& res_offsets = res->get_offsets();
1523
435
        auto& res_chars = res->get_chars();
1524
435
        res_offsets.resize(input_rows_count);
1525
1526
435
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
435
        const auto& strcol_offsets = strcol->get_offsets();
1528
435
        const auto& strcol_chars = strcol->get_chars();
1529
1530
435
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
435
        const auto& col_len_data = col_len->get_data();
1532
1533
435
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
435
        const auto& padcol_offsets = padcol->get_offsets();
1535
435
        const auto& padcol_chars = padcol->get_chars();
1536
435
        std::visit(
1537
435
                [&](auto str_const, auto len_const, auto pad_const) {
1538
435
                    execute_utf8<str_const, len_const, pad_const>(
1539
435
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
435
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
435
                },
1542
435
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
435
                make_bool_variant(col_const[2]));
1544
1545
435
        block.get_by_position(result).column =
1546
435
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
435
        return Status::OK();
1548
435
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1508
435
                        uint32_t result, size_t input_rows_count) const override {
1509
435
        DCHECK_GE(arguments.size(), 3);
1510
435
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1511
        // we create a zero column to simply implement
1512
435
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1513
435
        auto res = ColumnString::create();
1514
1515
435
        ColumnPtr col[3];
1516
435
        bool col_const[3];
1517
1.74k
        for (size_t i = 0; i < 3; ++i) {
1518
1.30k
            std::tie(col[i], col_const[i]) =
1519
1.30k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
1520
1.30k
        }
1521
435
        auto& null_map_data = null_map->get_data();
1522
435
        auto& res_offsets = res->get_offsets();
1523
435
        auto& res_chars = res->get_chars();
1524
435
        res_offsets.resize(input_rows_count);
1525
1526
435
        const auto* strcol = assert_cast<const ColumnString*>(col[0].get());
1527
435
        const auto& strcol_offsets = strcol->get_offsets();
1528
435
        const auto& strcol_chars = strcol->get_chars();
1529
1530
435
        const auto* col_len = assert_cast<const ColumnInt32*>(col[1].get());
1531
435
        const auto& col_len_data = col_len->get_data();
1532
1533
435
        const auto* padcol = assert_cast<const ColumnString*>(col[2].get());
1534
435
        const auto& padcol_offsets = padcol->get_offsets();
1535
435
        const auto& padcol_chars = padcol->get_chars();
1536
435
        std::visit(
1537
435
                [&](auto str_const, auto len_const, auto pad_const) {
1538
435
                    execute_utf8<str_const, len_const, pad_const>(
1539
435
                            strcol_offsets, strcol_chars, col_len_data, padcol_offsets,
1540
435
                            padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count);
1541
435
                },
1542
435
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
1543
435
                make_bool_variant(col_const[2]));
1544
1545
435
        block.get_by_position(result).column =
1546
435
                ColumnNullable::create(std::move(res), std::move(null_map));
1547
435
        return Status::OK();
1548
435
    }
1549
1550
    template <bool str_const, bool len_const, bool pad_const>
1551
    void execute_utf8(const ColumnString::Offsets& strcol_offsets,
1552
                      const ColumnString::Chars& strcol_chars,
1553
                      const ColumnInt32::Container& col_len_data,
1554
                      const ColumnString::Offsets& padcol_offsets,
1555
                      const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets,
1556
                      ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data,
1557
870
                      size_t input_rows_count) const {
1558
870
        std::vector<size_t> pad_index;
1559
870
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
870
        if constexpr (pad_const) {
1563
372
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
372
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
372
        }
1566
1567
870
        fmt::memory_buffer buffer;
1568
870
        buffer.resize(strcol_chars.size());
1569
870
        size_t buffer_len = 0;
1570
1571
1.93k
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
1.06k
            if constexpr (!pad_const) {
1573
691
                pad_index.clear();
1574
691
            }
1575
1.06k
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
1.06k
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
548
                null_map_data[i] = true;
1579
548
                res_offsets[i] = buffer_len;
1580
548
            } else {
1581
515
                const auto str_idx = index_check_const<str_const>(i);
1582
515
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
515
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
515
                const auto pad_idx = index_check_const<pad_const>(i);
1585
515
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
515
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
515
                auto [iterate_byte_len, iterate_char_len] =
1589
515
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
515
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
515
                if (iterate_char_len == len) {
1593
471
                    buffer.resize(buffer_len + iterate_byte_len);
1594
471
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
471
                    buffer_len += iterate_byte_len;
1596
471
                    res_offsets[i] = buffer_len;
1597
471
                    continue;
1598
471
                }
1599
44
                size_t pad_char_size;
1600
44
                if constexpr (!pad_const) {
1601
32
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
32
                                                                         pad_len, pad_index);
1603
32
                } else {
1604
12
                    pad_char_size = const_pad_char_size;
1605
12
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
44
                if (pad_char_size == 0) {
1609
6
                    res_offsets[i] = buffer_len;
1610
6
                    continue;
1611
6
                }
1612
38
                const size_t str_char_size = iterate_char_len;
1613
38
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
38
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
38
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
38
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
38
                buffer.resize(buffer_len + new_capacity);
1618
38
                if constexpr (!Impl::is_lpad) {
1619
19
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
19
                    buffer_len += str_len;
1621
19
                }
1622
                // Prepend chars of pad.
1623
38
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
38
                                      pad_times);
1625
38
                buffer_len += pad_times * pad_len;
1626
1627
38
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
38
                buffer_len += pad_remainder_len;
1629
1630
38
                if constexpr (Impl::is_lpad) {
1631
19
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
19
                    buffer_len += str_len;
1633
19
                }
1634
38
                res_offsets[i] = buffer_len;
1635
38
            }
1636
1.06k
        }
1637
870
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
870
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
63
                      size_t input_rows_count) const {
1558
63
        std::vector<size_t> pad_index;
1559
63
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
63
        fmt::memory_buffer buffer;
1568
63
        buffer.resize(strcol_chars.size());
1569
63
        size_t buffer_len = 0;
1570
1571
223
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
160
            if constexpr (!pad_const) {
1573
160
                pad_index.clear();
1574
160
            }
1575
160
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
160
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
82
                null_map_data[i] = true;
1579
82
                res_offsets[i] = buffer_len;
1580
82
            } else {
1581
78
                const auto str_idx = index_check_const<str_const>(i);
1582
78
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
78
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
78
                const auto pad_idx = index_check_const<pad_const>(i);
1585
78
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
78
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
78
                auto [iterate_byte_len, iterate_char_len] =
1589
78
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
78
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
78
                if (iterate_char_len == len) {
1593
68
                    buffer.resize(buffer_len + iterate_byte_len);
1594
68
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
68
                    buffer_len += iterate_byte_len;
1596
68
                    res_offsets[i] = buffer_len;
1597
68
                    continue;
1598
68
                }
1599
10
                size_t pad_char_size;
1600
10
                if constexpr (!pad_const) {
1601
10
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
10
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
10
                if (pad_char_size == 0) {
1609
3
                    res_offsets[i] = buffer_len;
1610
3
                    continue;
1611
3
                }
1612
7
                const size_t str_char_size = iterate_char_len;
1613
7
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
7
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
7
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
7
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
7
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
7
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
7
                                      pad_times);
1625
7
                buffer_len += pad_times * pad_len;
1626
1627
7
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
7
                buffer_len += pad_remainder_len;
1629
1630
7
                if constexpr (Impl::is_lpad) {
1631
7
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
7
                    buffer_len += str_len;
1633
7
                }
1634
7
                res_offsets[i] = buffer_len;
1635
7
            }
1636
160
        }
1637
63
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
63
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
                if constexpr (!Impl::is_lpad) {
1619
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
                    buffer_len += str_len;
1621
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
2
                if constexpr (Impl::is_lpad) {
1631
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
2
                    buffer_len += str_len;
1633
2
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringLPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
63
                      size_t input_rows_count) const {
1558
63
        std::vector<size_t> pad_index;
1559
63
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
63
        fmt::memory_buffer buffer;
1568
63
        buffer.resize(strcol_chars.size());
1569
63
        size_t buffer_len = 0;
1570
1571
222
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
159
            if constexpr (!pad_const) {
1573
159
                pad_index.clear();
1574
159
            }
1575
159
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
159
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
82
                null_map_data[i] = true;
1579
82
                res_offsets[i] = buffer_len;
1580
82
            } else {
1581
77
                const auto str_idx = index_check_const<str_const>(i);
1582
77
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
77
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
77
                const auto pad_idx = index_check_const<pad_const>(i);
1585
77
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
77
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
77
                auto [iterate_byte_len, iterate_char_len] =
1589
77
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
77
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
77
                if (iterate_char_len == len) {
1593
67
                    buffer.resize(buffer_len + iterate_byte_len);
1594
67
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
67
                    buffer_len += iterate_byte_len;
1596
67
                    res_offsets[i] = buffer_len;
1597
67
                    continue;
1598
67
                }
1599
10
                size_t pad_char_size;
1600
10
                if constexpr (!pad_const) {
1601
10
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
10
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
10
                if (pad_char_size == 0) {
1609
3
                    res_offsets[i] = buffer_len;
1610
3
                    continue;
1611
3
                }
1612
7
                const size_t str_char_size = iterate_char_len;
1613
7
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
7
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
7
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
7
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
7
                buffer.resize(buffer_len + new_capacity);
1618
7
                if constexpr (!Impl::is_lpad) {
1619
7
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
7
                    buffer_len += str_len;
1621
7
                }
1622
                // Prepend chars of pad.
1623
7
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
7
                                      pad_times);
1625
7
                buffer_len += pad_times * pad_len;
1626
1627
7
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
7
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
7
                res_offsets[i] = buffer_len;
1635
7
            }
1636
159
        }
1637
63
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
63
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb0ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb0ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
62
        if constexpr (pad_const) {
1563
62
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
62
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
62
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
            if constexpr (!pad_const) {
1573
                pad_index.clear();
1574
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
                if constexpr (!pad_const) {
1601
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
                                                                         pad_len, pad_index);
1603
2
                } else {
1604
2
                    pad_char_size = const_pad_char_size;
1605
2
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
_ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb0EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
Line
Count
Source
1557
62
                      size_t input_rows_count) const {
1558
62
        std::vector<size_t> pad_index;
1559
62
        size_t const_pad_char_size = 0;
1560
        // If pad_const = true, initialize pad_index only once.
1561
        // The same logic applies to the if constexpr (!pad_const) condition below.
1562
        if constexpr (pad_const) {
1563
            const_pad_char_size = simd::VStringFunctions::get_char_len(
1564
                    (const char*)padcol_chars.data(), padcol_offsets[0], pad_index);
1565
        }
1566
1567
62
        fmt::memory_buffer buffer;
1568
62
        buffer.resize(strcol_chars.size());
1569
62
        size_t buffer_len = 0;
1570
1571
124
        for (size_t i = 0; i < input_rows_count; ++i) {
1572
62
            if constexpr (!pad_const) {
1573
62
                pad_index.clear();
1574
62
            }
1575
62
            const auto len = col_len_data[index_check_const<len_const>(i)];
1576
62
            if (len < 0) {
1577
                // return NULL when input length is invalid number
1578
32
                null_map_data[i] = true;
1579
32
                res_offsets[i] = buffer_len;
1580
32
            } else {
1581
30
                const auto str_idx = index_check_const<str_const>(i);
1582
30
                const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1];
1583
30
                const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]];
1584
30
                const auto pad_idx = index_check_const<pad_const>(i);
1585
30
                const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1];
1586
30
                const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]];
1587
1588
30
                auto [iterate_byte_len, iterate_char_len] =
1589
30
                        simd::VStringFunctions::iterate_utf8_with_limit_length(
1590
30
                                (const char*)str_data, (const char*)str_data + str_len, len);
1591
                // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len
1592
30
                if (iterate_char_len == len) {
1593
28
                    buffer.resize(buffer_len + iterate_byte_len);
1594
28
                    memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len);
1595
28
                    buffer_len += iterate_byte_len;
1596
28
                    res_offsets[i] = buffer_len;
1597
28
                    continue;
1598
28
                }
1599
2
                size_t pad_char_size;
1600
2
                if constexpr (!pad_const) {
1601
2
                    pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data,
1602
2
                                                                         pad_len, pad_index);
1603
                } else {
1604
                    pad_char_size = const_pad_char_size;
1605
                }
1606
1607
                // make compatible with mysql. return empty string if pad is empty
1608
2
                if (pad_char_size == 0) {
1609
0
                    res_offsets[i] = buffer_len;
1610
0
                    continue;
1611
0
                }
1612
2
                const size_t str_char_size = iterate_char_len;
1613
2
                const size_t pad_times = (len - str_char_size) / pad_char_size;
1614
2
                const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size];
1615
2
                const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len;
1616
2
                ColumnString::check_chars_length(buffer_len + new_capacity, i);
1617
2
                buffer.resize(buffer_len + new_capacity);
1618
2
                if constexpr (!Impl::is_lpad) {
1619
2
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1620
2
                    buffer_len += str_len;
1621
2
                }
1622
                // Prepend chars of pad.
1623
2
                StringOP::fast_repeat((uint8_t*)buffer.data() + buffer_len, pad_data, pad_len,
1624
2
                                      pad_times);
1625
2
                buffer_len += pad_times * pad_len;
1626
1627
2
                memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len);
1628
2
                buffer_len += pad_remainder_len;
1629
1630
                if constexpr (Impl::is_lpad) {
1631
                    memcpy(buffer.data() + buffer_len, str_data, str_len);
1632
                    buffer_len += str_len;
1633
                }
1634
2
                res_offsets[i] = buffer_len;
1635
2
            }
1636
62
        }
1637
62
        res_chars.insert(buffer.data(), buffer.data() + buffer_len);
1638
62
    }
Unexecuted instantiation: _ZNK5doris17FunctionStringPadINS_10StringRPadEE12execute_utf8ILb1ELb1ELb1EEEvRKNS_8PODArrayIjLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IhLm4096ES7_Lm16ELm15EEERKNS4_IiLm4096ES7_Lm16ELm15EEESA_SD_RS8_RSB_SI_m
1639
};
1640
1641
template <typename Impl>
1642
class FunctionStringFormatRound : public IFunction {
1643
public:
1644
    static constexpr auto name = "format_round";
1645
79
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE6createEv
Line
Count
Source
1645
9
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE6createEv
Line
Count
Source
1645
9
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE6createEv
Line
Count
Source
1645
9
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv
Line
Count
Source
1645
9
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv
Line
Count
Source
1645
8
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv
Line
Count
Source
1645
19
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv
Line
Count
Source
1645
8
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
_ZN5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv
Line
Count
Source
1645
8
    static FunctionPtr create() { return std::make_shared<FunctionStringFormatRound>(); }
1646
8
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev
Line
Count
Source
1646
1
    String get_name() const override { return name; }
1647
1648
15
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
15
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
15
        return std::make_shared<DataTypeString>();
1654
15
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
1
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
1
        return std::make_shared<DataTypeString>();
1654
1
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
1648
11
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1649
11
        if (arguments.size() != 2) {
1650
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
1651
0
                                   "Function {} requires exactly 2 argument", name);
1652
0
        }
1653
11
        return std::make_shared<DataTypeString>();
1654
11
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
1655
56
    DataTypes get_variadic_argument_types_impl() const override {
1656
56
        return Impl::get_variadic_argument_types();
1657
56
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv
Line
Count
Source
1655
7
    DataTypes get_variadic_argument_types_impl() const override {
1656
7
        return Impl::get_variadic_argument_types();
1657
7
    }
1658
15
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
1
    size_t get_number_of_arguments() const override { return 2; }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv
Line
Count
Source
1658
11
    size_t get_number_of_arguments() const override { return 2; }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv
1659
1660
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1661
12
                        uint32_t result, size_t input_rows_count) const override {
1662
12
        auto res_column = ColumnString::create();
1663
12
        ColumnPtr argument_column =
1664
12
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
12
        ColumnPtr argument_column_2;
1666
12
        bool is_const;
1667
12
        std::tie(argument_column_2, is_const) =
1668
12
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
12
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
12
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
10
        } else {
1675
10
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
10
                                                          argument_column_2, input_rows_count));
1677
10
        }
1678
1679
12
        block.replace_by_position(result, std::move(res_column));
1680
12
        return Status::OK();
1681
12
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_20FormatRoundInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_21FormatRoundInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
1
                        uint32_t result, size_t input_rows_count) const override {
1662
1
        auto res_column = ColumnString::create();
1663
1
        ColumnPtr argument_column =
1664
1
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
1
        ColumnPtr argument_column_2;
1666
1
        bool is_const;
1667
1
        std::tie(argument_column_2, is_const) =
1668
1
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
1
        if (is_const) {
1672
0
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
0
                                                         argument_column_2, input_rows_count));
1674
1
        } else {
1675
1
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
1
                                                          argument_column_2, input_rows_count));
1677
1
        }
1678
1679
1
        block.replace_by_position(result, std::move(res_column));
1680
1
        return Status::OK();
1681
1
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
1661
8
                        uint32_t result, size_t input_rows_count) const override {
1662
8
        auto res_column = ColumnString::create();
1663
8
        ColumnPtr argument_column =
1664
8
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
1665
8
        ColumnPtr argument_column_2;
1666
8
        bool is_const;
1667
8
        std::tie(argument_column_2, is_const) =
1668
8
                unpack_if_const(block.get_by_position(arguments[1]).column);
1669
8
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
1670
1671
8
        if (is_const) {
1672
2
            RETURN_IF_ERROR(Impl::template execute<true>(context, result_column, argument_column,
1673
2
                                                         argument_column_2, input_rows_count));
1674
6
        } else {
1675
6
            RETURN_IF_ERROR(Impl::template execute<false>(context, result_column, argument_column,
1676
6
                                                          argument_column_2, input_rows_count));
1677
6
        }
1678
1679
8
        block.replace_by_position(result, std::move(res_column));
1680
8
        return Status::OK();
1681
8
    }
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris25FunctionStringFormatRoundINS_22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
1682
};
1683
1684
class FunctionSplitPart : public IFunction {
1685
public:
1686
    static constexpr auto name = "split_part";
1687
8
    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
1688
1
    String get_name() const override { return name; }
1689
0
    size_t get_number_of_arguments() const override { return 3; }
1690
1691
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1692
0
        return make_nullable(std::make_shared<DataTypeString>());
1693
0
    }
1694
1695
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1696
0
                        uint32_t result, size_t input_rows_count) const override {
1697
0
        DCHECK_EQ(arguments.size(), 3);
1698
1699
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
1700
        // Create a zero column to simply implement
1701
0
        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
1702
0
        auto res = ColumnString::create();
1703
1704
0
        auto& null_map_data = null_map->get_data();
1705
0
        auto& res_offsets = res->get_offsets();
1706
0
        auto& res_chars = res->get_chars();
1707
0
        res_offsets.resize(input_rows_count);
1708
1709
0
        const size_t argument_size = arguments.size();
1710
0
        std::vector<ColumnPtr> argument_columns(argument_size);
1711
0
        for (size_t i = 0; i < argument_size; ++i) {
1712
0
            argument_columns[i] =
1713
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
1714
0
            if (const auto* nullable =
1715
0
                        check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
1716
                // Danger: Here must dispose the null map data first! Because
1717
                // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem
1718
                // of column nullable mem of null map
1719
0
                VectorizedUtils::update_null_map(null_map->get_data(),
1720
0
                                                 nullable->get_null_map_data());
1721
0
                argument_columns[i] = nullable->get_nested_column_ptr();
1722
0
            }
1723
0
        }
1724
1725
0
        const auto* str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
1726
1727
0
        const auto* delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
1728
1729
0
        const auto* part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
1730
0
        const auto& part_num_col_data = part_num_col->get_data();
1731
1732
0
        for (size_t i = 0; i < input_rows_count; ++i) {
1733
0
            if (part_num_col_data[i] == 0) {
1734
0
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1735
0
                continue;
1736
0
            }
1737
1738
0
            auto delimiter = delimiter_col->get_data_at(i);
1739
0
            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
1740
0
            auto part_number = part_num_col_data[i];
1741
0
            auto str = str_col->get_data_at(i);
1742
0
            if (delimiter.size == 0) {
1743
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
1744
0
                continue;
1745
0
            }
1746
1747
0
            if (part_number > 0) {
1748
0
                if (delimiter.size == 1) {
1749
                    // If delimiter is a char, use memchr to split
1750
0
                    int32_t pre_offset = -1;
1751
0
                    int32_t offset = -1;
1752
0
                    int32_t num = 0;
1753
0
                    while (num < part_number) {
1754
0
                        pre_offset = offset;
1755
0
                        size_t n = str.size - offset - 1;
1756
0
                        const char* pos = reinterpret_cast<const char*>(
1757
0
                                memchr(str.data + offset + 1, delimiter_str[0], n));
1758
0
                        if (pos != nullptr) {
1759
0
                            offset = pos - str.data;
1760
0
                            num++;
1761
0
                        } else {
1762
0
                            offset = str.size;
1763
0
                            num = (num == 0) ? 0 : num + 1;
1764
0
                            break;
1765
0
                        }
1766
0
                    }
1767
1768
0
                    if (num == part_number) {
1769
0
                        StringOP::push_value_string(
1770
0
                                std::string_view {
1771
0
                                        reinterpret_cast<const char*>(str.data + pre_offset + 1),
1772
0
                                        (size_t)offset - pre_offset - 1},
1773
0
                                i, res_chars, res_offsets);
1774
0
                    } else {
1775
0
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1776
0
                    }
1777
0
                } else {
1778
                    // If delimiter is a string, use memmem to split
1779
0
                    int32_t pre_offset = -delimiter.size;
1780
0
                    int32_t offset = -delimiter.size;
1781
0
                    int32_t num = 0;
1782
0
                    while (num < part_number) {
1783
0
                        pre_offset = offset;
1784
0
                        size_t n = str.size - offset - delimiter.size;
1785
0
                        char* pos =
1786
0
                                reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
1787
0
                                                               n, delimiter.data, delimiter.size));
1788
0
                        if (pos != nullptr) {
1789
0
                            offset = pos - str.data;
1790
0
                            num++;
1791
0
                        } else {
1792
0
                            offset = str.size;
1793
0
                            num = (num == 0) ? 0 : num + 1;
1794
0
                            break;
1795
0
                        }
1796
0
                    }
1797
1798
0
                    if (num == part_number) {
1799
0
                        StringOP::push_value_string(
1800
0
                                std::string_view {reinterpret_cast<const char*>(
1801
0
                                                          str.data + pre_offset + delimiter.size),
1802
0
                                                  (size_t)offset - pre_offset - delimiter.size},
1803
0
                                i, res_chars, res_offsets);
1804
0
                    } else {
1805
0
                        StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1806
0
                    }
1807
0
                }
1808
0
            } else {
1809
0
                part_number = -part_number;
1810
0
                auto str_str = str.to_string();
1811
0
                int32_t offset = str.size;
1812
0
                int32_t pre_offset = offset;
1813
0
                int32_t num = 0;
1814
0
                auto substr = str_str;
1815
0
                while (num <= part_number && offset >= 0) {
1816
0
                    offset = (int)substr.rfind(delimiter, offset);
1817
0
                    if (offset != -1) {
1818
0
                        if (++num == part_number) {
1819
0
                            break;
1820
0
                        }
1821
0
                        pre_offset = offset;
1822
0
                        offset = offset - 1;
1823
0
                        substr = str_str.substr(0, pre_offset);
1824
0
                    } else {
1825
0
                        break;
1826
0
                    }
1827
0
                }
1828
0
                num = (offset == -1 && num != 0) ? num + 1 : num;
1829
1830
0
                if (num == part_number) {
1831
0
                    if (offset == -1) {
1832
0
                        StringOP::push_value_string(
1833
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
1834
0
                                                  (size_t)pre_offset},
1835
0
                                i, res_chars, res_offsets);
1836
0
                    } else {
1837
0
                        StringOP::push_value_string(
1838
0
                                std::string_view {str_str.substr(
1839
0
                                        offset + delimiter.size,
1840
0
                                        (size_t)pre_offset - offset - delimiter.size)},
1841
0
                                i, res_chars, res_offsets);
1842
0
                    }
1843
0
                } else {
1844
0
                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
1845
0
                }
1846
0
            }
1847
0
        }
1848
1849
0
        block.get_by_position(result).column =
1850
0
                ColumnNullable::create(std::move(res), std::move(null_map));
1851
0
        return Status::OK();
1852
0
    }
1853
};
1854
1855
class FunctionSubstringIndex : public IFunction {
1856
public:
1857
    static constexpr auto name = "substring_index";
1858
8
    static FunctionPtr create() { return std::make_shared<FunctionSubstringIndex>(); }
1859
1
    String get_name() const override { return name; }
1860
0
    size_t get_number_of_arguments() const override { return 3; }
1861
1862
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
1863
0
        return std::make_shared<DataTypeString>();
1864
0
    }
1865
1866
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
1867
0
                        uint32_t result, size_t input_rows_count) const override {
1868
0
        DCHECK_EQ(arguments.size(), 3);
1869
1870
        // Create a zero column to simply implement
1871
0
        auto res = ColumnString::create();
1872
1873
0
        auto& res_offsets = res->get_offsets();
1874
0
        auto& res_chars = res->get_chars();
1875
0
        res_offsets.resize(input_rows_count);
1876
0
        ColumnPtr content_column;
1877
0
        bool content_const = false;
1878
0
        std::tie(content_column, content_const) =
1879
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
1880
1881
0
        const auto* str_col = assert_cast<const ColumnString*>(content_column.get());
1882
1883
        // Handle both constant and non-constant delimiter parameters
1884
0
        ColumnPtr delimiter_column_ptr;
1885
0
        bool delimiter_const = false;
1886
0
        std::tie(delimiter_column_ptr, delimiter_const) =
1887
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
1888
0
        const auto* delimiter_col = assert_cast<const ColumnString*>(delimiter_column_ptr.get());
1889
1890
0
        ColumnPtr part_num_column_ptr;
1891
0
        bool part_num_const = false;
1892
0
        std::tie(part_num_column_ptr, part_num_const) =
1893
0
                unpack_if_const(block.get_by_position(arguments[2]).column);
1894
0
        const ColumnInt32* part_num_col =
1895
0
                assert_cast<const ColumnInt32*>(part_num_column_ptr.get());
1896
1897
        // For constant multi-character delimiters, create StringRef and StringSearch only once
1898
0
        std::optional<StringRef> const_delimiter_ref;
1899
0
        std::optional<StringSearch> const_search;
1900
0
        if (delimiter_const && delimiter_col->get_data_at(0).size > 1) {
1901
0
            const_delimiter_ref.emplace(delimiter_col->get_data_at(0));
1902
0
            const_search.emplace(&const_delimiter_ref.value());
1903
0
        }
1904
1905
0
        for (size_t i = 0; i < input_rows_count; ++i) {
1906
0
            auto str = str_col->get_data_at(content_const ? 0 : i);
1907
0
            auto delimiter = delimiter_col->get_data_at(delimiter_const ? 0 : i);
1908
0
            int32_t delimiter_size = delimiter.size;
1909
1910
0
            auto part_number = part_num_col->get_element(part_num_const ? 0 : i);
1911
1912
0
            if (part_number == 0 || delimiter_size == 0) {
1913
0
                StringOP::push_empty_string(i, res_chars, res_offsets);
1914
0
                continue;
1915
0
            }
1916
1917
0
            if (part_number > 0) {
1918
0
                if (delimiter_size == 1) {
1919
0
                    int32_t offset = -1;
1920
0
                    int32_t num = 0;
1921
0
                    while (num < part_number) {
1922
0
                        size_t n = str.size - offset - 1;
1923
0
                        const char* pos = reinterpret_cast<const char*>(
1924
0
                                memchr(str.data + offset + 1, delimiter.data[0], n));
1925
0
                        if (pos != nullptr) {
1926
0
                            offset = pos - str.data;
1927
0
                            num++;
1928
0
                        } else {
1929
0
                            offset = str.size;
1930
0
                            num = (num == 0) ? 0 : num + 1;
1931
0
                            break;
1932
0
                        }
1933
0
                    }
1934
1935
0
                    if (num == part_number) {
1936
0
                        StringOP::push_value_string(
1937
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
1938
0
                                                  (size_t)offset},
1939
0
                                i, res_chars, res_offsets);
1940
0
                    } else {
1941
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
1942
0
                                                    res_chars, res_offsets);
1943
0
                    }
1944
0
                } else {
1945
                    // For multi-character delimiters
1946
                    // Use pre-created StringRef and StringSearch for constant delimiters
1947
0
                    StringRef delimiter_ref = const_delimiter_ref ? const_delimiter_ref.value()
1948
0
                                                                  : StringRef(delimiter);
1949
0
                    const StringSearch* search_ptr = const_search ? &const_search.value() : nullptr;
1950
0
                    StringSearch local_search(&delimiter_ref);
1951
0
                    if (!search_ptr) {
1952
0
                        search_ptr = &local_search;
1953
0
                    }
1954
1955
0
                    int32_t offset = -delimiter_size;
1956
0
                    int32_t num = 0;
1957
0
                    while (num < part_number) {
1958
0
                        size_t n = str.size - offset - delimiter_size;
1959
                        // search first match delimter_ref index from src string among str_offset to end
1960
0
                        const char* pos = search_ptr->search(str.data + offset + delimiter_size, n);
1961
0
                        if (pos < str.data + str.size) {
1962
0
                            offset = pos - str.data;
1963
0
                            num++;
1964
0
                        } else {
1965
0
                            offset = str.size;
1966
0
                            num = (num == 0) ? 0 : num + 1;
1967
0
                            break;
1968
0
                        }
1969
0
                    }
1970
1971
0
                    if (num == part_number) {
1972
0
                        StringOP::push_value_string(
1973
0
                                std::string_view {reinterpret_cast<const char*>(str.data),
1974
0
                                                  (size_t)offset},
1975
0
                                i, res_chars, res_offsets);
1976
0
                    } else {
1977
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
1978
0
                                                    res_chars, res_offsets);
1979
0
                    }
1980
0
                }
1981
0
            } else {
1982
0
                int neg_part_number = -part_number;
1983
0
                auto str_str = str.to_string();
1984
0
                int32_t offset = str.size;
1985
0
                int32_t pre_offset = offset;
1986
0
                int32_t num = 0;
1987
0
                auto substr = str_str;
1988
1989
                // Use pre-created StringRef for constant delimiters
1990
0
                StringRef delimiter_str =
1991
0
                        const_delimiter_ref
1992
0
                                ? const_delimiter_ref.value()
1993
0
                                : StringRef(reinterpret_cast<const char*>(delimiter.data),
1994
0
                                            delimiter.size);
1995
1996
0
                while (num <= neg_part_number && offset >= 0) {
1997
0
                    offset = (int)substr.rfind(delimiter_str, offset);
1998
0
                    if (offset != -1) {
1999
0
                        if (++num == neg_part_number) {
2000
0
                            break;
2001
0
                        }
2002
0
                        pre_offset = offset;
2003
0
                        offset = offset - 1;
2004
0
                        substr = str_str.substr(0, pre_offset);
2005
0
                    } else {
2006
0
                        break;
2007
0
                    }
2008
0
                }
2009
0
                num = (offset == -1 && num != 0) ? num + 1 : num;
2010
2011
0
                if (num == neg_part_number) {
2012
0
                    if (offset == -1) {
2013
0
                        StringOP::push_value_string(std::string_view(str.data, str.size), i,
2014
0
                                                    res_chars, res_offsets);
2015
0
                    } else {
2016
0
                        StringOP::push_value_string(
2017
0
                                std::string_view {str.data + offset + delimiter_size,
2018
0
                                                  str.size - offset - delimiter_size},
2019
0
                                i, res_chars, res_offsets);
2020
0
                    }
2021
0
                } else {
2022
0
                    StringOP::push_value_string(std::string_view(str.data, str.size), i, res_chars,
2023
0
                                                res_offsets);
2024
0
                }
2025
0
            }
2026
0
        }
2027
2028
0
        block.get_by_position(result).column = std::move(res);
2029
0
        return Status::OK();
2030
0
    }
2031
};
2032
2033
class FunctionSplitByString : public IFunction {
2034
public:
2035
    static constexpr auto name = "split_by_string";
2036
2037
8
    static FunctionPtr create() { return std::make_shared<FunctionSplitByString>(); }
2038
    using NullMapType = PaddedPODArray<UInt8>;
2039
2040
1
    String get_name() const override { return name; }
2041
2042
1
    bool is_variadic() const override { return false; }
2043
2044
0
    size_t get_number_of_arguments() const override { return 2; }
2045
2046
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2047
0
        DCHECK(is_string_type(arguments[0]->get_primitive_type()))
2048
0
                << "first argument for function: " << name << " should be string"
2049
0
                << " and arguments[0] is " << arguments[0]->get_name();
2050
0
        DCHECK(is_string_type(arguments[1]->get_primitive_type()))
2051
0
                << "second argument for function: " << name << " should be string"
2052
0
                << " and arguments[1] is " << arguments[1]->get_name();
2053
0
        return std::make_shared<DataTypeArray>(make_nullable(arguments[0]));
2054
0
    }
2055
2056
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
2057
0
                        uint32_t result, size_t input_rows_count) const override {
2058
0
        DCHECK_EQ(arguments.size(), 2);
2059
2060
0
        const auto& [src_column, left_const] =
2061
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
2062
0
        const auto& [right_column, right_const] =
2063
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
2064
2065
0
        DataTypePtr right_column_type = block.get_by_position(arguments[1]).type;
2066
0
        DataTypePtr src_column_type = block.get_by_position(arguments[0]).type;
2067
0
        auto dest_column_ptr = ColumnArray::create(make_nullable(src_column_type)->create_column(),
2068
0
                                                   ColumnArray::ColumnOffsets::create());
2069
2070
0
        dest_column_ptr->resize(0);
2071
0
        auto& dest_offsets = dest_column_ptr->get_offsets();
2072
2073
0
        auto& dest_nullable_col = assert_cast<ColumnNullable&>(dest_column_ptr->get_data());
2074
0
        auto* dest_nested_column = dest_nullable_col.get_nested_column_ptr().get();
2075
2076
0
        const auto* col_str = assert_cast<const ColumnString*>(src_column.get());
2077
2078
0
        const auto* col_delimiter = assert_cast<const ColumnString*>(right_column.get());
2079
2080
0
        std::visit(
2081
0
                [&](auto src_const, auto delimiter_const) {
2082
0
                    _execute<src_const, delimiter_const>(*col_str, *col_delimiter,
2083
0
                                                         *dest_nested_column, dest_offsets,
2084
0
                                                         input_rows_count);
2085
0
                },
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESF_EEDaSA_SB_
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESE_IbLb1EEEEDaSA_SB_
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESE_IbLb0EEEEDaSA_SB_
Unexecuted instantiation: _ZZNK5doris21FunctionSplitByString12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESF_EEDaSA_SB_
2086
0
                make_bool_variant(left_const), make_bool_variant(right_const));
2087
2088
        // all elements in dest_nested_column are not null
2089
0
        dest_nullable_col.get_null_map_column().get_data().resize_fill(dest_nested_column->size(),
2090
0
                                                                       false);
2091
0
        block.replace_by_position(result, std::move(dest_column_ptr));
2092
2093
0
        return Status::OK();
2094
0
    }
2095
2096
private:
2097
    template <bool src_const, bool delimiter_const>
2098
    void _execute(const ColumnString& src_column_string, const ColumnString& delimiter_column,
2099
                  IColumn& dest_nested_column, ColumnArray::Offsets64& dest_offsets,
2100
0
                  size_t size) const {
2101
0
        auto& dest_column_string = assert_cast<ColumnString&>(dest_nested_column);
2102
0
        ColumnString::Chars& column_string_chars = dest_column_string.get_chars();
2103
0
        ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets();
2104
0
        column_string_chars.reserve(0);
2105
2106
0
        ColumnArray::Offset64 string_pos = 0;
2107
0
        ColumnArray::Offset64 dest_pos = 0;
2108
2109
0
        StringSearch search;
2110
0
        StringRef delimiter_ref_for_search;
2111
2112
0
        if constexpr (delimiter_const) {
2113
0
            delimiter_ref_for_search = delimiter_column.get_data_at(0);
2114
0
            search.set_pattern(&delimiter_ref_for_search);
2115
0
        }
2116
2117
0
        for (size_t i = 0; i < size; i++) {
2118
0
            const StringRef str_ref =
2119
0
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2120
0
            const StringRef delimiter_ref =
2121
0
                    delimiter_column.get_data_at(index_check_const<delimiter_const>(i));
2122
2123
0
            if (str_ref.size == 0) {
2124
0
                dest_offsets.push_back(dest_pos);
2125
0
                continue;
2126
0
            }
2127
0
            if (delimiter_ref.size == 0) {
2128
0
                split_empty_delimiter(str_ref, column_string_chars, column_string_offsets,
2129
0
                                      string_pos, dest_pos);
2130
0
            } else {
2131
0
                if constexpr (!delimiter_const) {
2132
0
                    search.set_pattern(&delimiter_ref);
2133
0
                }
2134
0
                for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2135
0
                    const size_t str_offset = str_pos;
2136
0
                    const size_t old_size = column_string_chars.size();
2137
                    // search first match delimter_ref index from src string among str_offset to end
2138
0
                    const char* result_start =
2139
0
                            search.search(str_ref.data + str_offset, str_ref.size - str_offset);
2140
                    // compute split part size
2141
0
                    const size_t split_part_size = result_start - str_ref.data - str_offset;
2142
                    // save dist string split part
2143
0
                    if (split_part_size > 0) {
2144
0
                        const size_t new_size = old_size + split_part_size;
2145
0
                        column_string_chars.resize(new_size);
2146
0
                        memcpy_small_allow_read_write_overflow15(
2147
0
                                column_string_chars.data() + old_size, str_ref.data + str_offset,
2148
0
                                split_part_size);
2149
                        // add dist string offset
2150
0
                        string_pos += split_part_size;
2151
0
                    }
2152
0
                    column_string_offsets.push_back(string_pos);
2153
                    // array offset + 1
2154
0
                    dest_pos++;
2155
                    // add src string str_pos to next search start
2156
0
                    str_pos += split_part_size + delimiter_ref.size;
2157
0
                }
2158
0
            }
2159
0
            dest_offsets.push_back(dest_pos);
2160
0
        }
2161
0
    }
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Unexecuted instantiation: _ZNK5doris21FunctionSplitByString8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES5_RNS_7IColumnERNS_8PODArrayImLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
2162
2163
    void split_empty_delimiter(const StringRef& str_ref, ColumnString::Chars& column_string_chars,
2164
                               ColumnString::Offsets& column_string_offsets,
2165
                               ColumnArray::Offset64& string_pos,
2166
0
                               ColumnArray::Offset64& dest_pos) const {
2167
0
        const size_t old_size = column_string_chars.size();
2168
0
        const size_t new_size = old_size + str_ref.size;
2169
0
        column_string_chars.resize(new_size);
2170
0
        memcpy(column_string_chars.data() + old_size, str_ref.data, str_ref.size);
2171
0
        if (simd::VStringFunctions::is_ascii(str_ref)) {
2172
0
            const auto size = str_ref.size;
2173
2174
0
            const auto nested_old_size = column_string_offsets.size();
2175
0
            const auto nested_new_size = nested_old_size + size;
2176
0
            column_string_offsets.resize(nested_new_size);
2177
0
            std::iota(column_string_offsets.data() + nested_old_size,
2178
0
                      column_string_offsets.data() + nested_new_size, string_pos + 1);
2179
2180
0
            string_pos += size;
2181
0
            dest_pos += size;
2182
            // The above code is equivalent to the code in the following comment.
2183
            // for (size_t i = 0; i < str_ref.size; i++) {
2184
            //     string_pos++;
2185
            //     column_string_offsets.push_back(string_pos);
2186
            //     (*dest_nested_null_map).push_back(false);
2187
            //     dest_pos++;
2188
            // }
2189
0
        } else {
2190
0
            for (size_t i = 0, utf8_char_len = 0; i < str_ref.size; i += utf8_char_len) {
2191
0
                utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str_ref.data[i]];
2192
2193
0
                string_pos += utf8_char_len;
2194
0
                column_string_offsets.push_back(string_pos);
2195
0
                dest_pos++;
2196
0
            }
2197
0
        }
2198
0
    }
2199
};
2200
2201
enum class FunctionCountSubStringType { TWO_ARGUMENTS, THREE_ARGUMENTS };
2202
2203
template <FunctionCountSubStringType type>
2204
class FunctionCountSubString : public IFunction {
2205
public:
2206
    static constexpr auto name = "count_substrings";
2207
    static constexpr auto arg_count = (type == FunctionCountSubStringType::TWO_ARGUMENTS) ? 2 : 3;
2208
2209
235
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE6createEv
Line
Count
Source
2209
51
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
_ZN5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE6createEv
Line
Count
Source
2209
184
    static FunctionPtr create() { return std::make_shared<FunctionCountSubString>(); }
2210
    using NullMapType = PaddedPODArray<UInt8>;
2211
2212
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8get_nameB5cxx11Ev
2213
2214
0
    size_t get_number_of_arguments() const override { return arg_count; }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE23get_number_of_argumentsEv
2215
2216
219
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2217
219
        return std::make_shared<DataTypeInt32>();
2218
219
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2216
43
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2217
43
        return std::make_shared<DataTypeInt32>();
2218
43
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2216
176
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2217
176
        return std::make_shared<DataTypeInt32>();
2218
176
    }
2219
2220
14
    DataTypes get_variadic_argument_types_impl() const override {
2221
14
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2222
7
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2223
7
        } else {
2224
7
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2225
7
                    std::make_shared<DataTypeInt32>()};
2226
7
        }
2227
14
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE32get_variadic_argument_types_implEv
Line
Count
Source
2220
7
    DataTypes get_variadic_argument_types_impl() const override {
2221
7
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2222
7
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2223
        } else {
2224
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2225
                    std::make_shared<DataTypeInt32>()};
2226
        }
2227
7
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE32get_variadic_argument_types_implEv
Line
Count
Source
2220
7
    DataTypes get_variadic_argument_types_impl() const override {
2221
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2222
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
2223
7
        } else {
2224
7
            return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
2225
7
                    std::make_shared<DataTypeInt32>()};
2226
7
        }
2227
7
    }
2228
2229
221
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE11is_variadicEv
Line
Count
Source
2229
44
    bool is_variadic() const override { return true; }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE11is_variadicEv
Line
Count
Source
2229
177
    bool is_variadic() const override { return true; }
2230
2231
    Status execute_impl(FunctionContext* /*context*/, Block& block, const ColumnNumbers& arguments,
2232
189
                        uint32_t result, size_t input_rows_count) const override {
2233
189
        DCHECK(arg_count);
2234
189
        bool col_const[arg_count];
2235
189
        ColumnPtr argument_columns[arg_count];
2236
722
        for (int i = 0; i < arg_count; ++i) {
2237
533
            std::tie(argument_columns[i], col_const[i]) =
2238
533
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2239
533
        }
2240
2241
189
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2242
189
        auto& dest_column_data = dest_column_ptr->get_data();
2243
2244
189
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2245
34
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2246
34
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2247
34
            std::visit(
2248
34
                    [&](auto str_const, auto pattern_const) {
2249
34
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
34
                                                           dest_column_data, input_rows_count);
2251
34
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESH_EEDaSC_SD_
Line
Count
Source
2248
12
                    [&](auto str_const, auto pattern_const) {
2249
12
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
12
                                                           dest_column_data, input_rows_count);
2251
12
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb0EESG_IbLb1EEEEDaSC_SD_
Line
Count
Source
2248
11
                    [&](auto str_const, auto pattern_const) {
2249
11
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
11
                                                           dest_column_data, input_rows_count);
2251
11
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESG_IbLb0EEEEDaSC_SD_
Line
Count
Source
2248
11
                    [&](auto str_const, auto pattern_const) {
2249
11
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
11
                                                           dest_column_data, input_rows_count);
2251
11
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_E_clISt17integral_constantIbLb1EESH_EEDaSC_SD_
2252
34
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2253
155
        } else {
2254
155
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2255
155
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2256
155
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2257
155
            std::visit(
2258
155
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
155
                        _execute<str_const, pattern_const, start_pos_const>(
2260
155
                                src_column_string, pattern_column, start_pos_column,
2261
155
                                dest_column_data, input_rows_count);
2262
155
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
2258
23
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
23
                        _execute<str_const, pattern_const, start_pos_const>(
2260
23
                                src_column_string, pattern_column, start_pos_column,
2261
23
                                dest_column_data, input_rows_count);
2262
23
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
_ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
2258
22
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
22
                        _execute<str_const, pattern_const, start_pos_const>(
2260
22
                                src_column_string, pattern_column, start_pos_column,
2261
22
                                dest_column_data, input_rows_count);
2262
22
                    },
Unexecuted instantiation: _ZZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
2263
155
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2264
155
                    make_bool_variant(col_const[2]));
2265
155
        }
2266
2267
189
        block.replace_by_position(result, std::move(dest_column_ptr));
2268
189
        return Status::OK();
2269
189
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2232
34
                        uint32_t result, size_t input_rows_count) const override {
2233
34
        DCHECK(arg_count);
2234
34
        bool col_const[arg_count];
2235
34
        ColumnPtr argument_columns[arg_count];
2236
102
        for (int i = 0; i < arg_count; ++i) {
2237
68
            std::tie(argument_columns[i], col_const[i]) =
2238
68
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2239
68
        }
2240
2241
34
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2242
34
        auto& dest_column_data = dest_column_ptr->get_data();
2243
2244
34
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2245
34
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2246
34
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2247
34
            std::visit(
2248
34
                    [&](auto str_const, auto pattern_const) {
2249
34
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
34
                                                           dest_column_data, input_rows_count);
2251
34
                    },
2252
34
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2253
        } else {
2254
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2255
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2256
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2257
            std::visit(
2258
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
                        _execute<str_const, pattern_const, start_pos_const>(
2260
                                src_column_string, pattern_column, start_pos_column,
2261
                                dest_column_data, input_rows_count);
2262
                    },
2263
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2264
                    make_bool_variant(col_const[2]));
2265
        }
2266
2267
34
        block.replace_by_position(result, std::move(dest_column_ptr));
2268
34
        return Status::OK();
2269
34
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2232
155
                        uint32_t result, size_t input_rows_count) const override {
2233
155
        DCHECK(arg_count);
2234
155
        bool col_const[arg_count];
2235
155
        ColumnPtr argument_columns[arg_count];
2236
620
        for (int i = 0; i < arg_count; ++i) {
2237
465
            std::tie(argument_columns[i], col_const[i]) =
2238
465
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2239
465
        }
2240
2241
155
        auto dest_column_ptr = ColumnInt32::create(input_rows_count);
2242
155
        auto& dest_column_data = dest_column_ptr->get_data();
2243
2244
        if constexpr (type == FunctionCountSubStringType::TWO_ARGUMENTS) {
2245
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2246
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2247
            std::visit(
2248
                    [&](auto str_const, auto pattern_const) {
2249
                        _execute<str_const, pattern_const>(src_column_string, pattern_column,
2250
                                                           dest_column_data, input_rows_count);
2251
                    },
2252
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]));
2253
155
        } else {
2254
155
            const auto& src_column_string = assert_cast<const ColumnString&>(*argument_columns[0]);
2255
155
            const auto& pattern_column = assert_cast<const ColumnString&>(*argument_columns[1]);
2256
155
            const auto& start_pos_column = assert_cast<const ColumnInt32&>(*argument_columns[2]);
2257
155
            std::visit(
2258
155
                    [&](auto str_const, auto pattern_const, auto start_pos_const) {
2259
155
                        _execute<str_const, pattern_const, start_pos_const>(
2260
155
                                src_column_string, pattern_column, start_pos_column,
2261
155
                                dest_column_data, input_rows_count);
2262
155
                    },
2263
155
                    make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
2264
155
                    make_bool_variant(col_const[2]));
2265
155
        }
2266
2267
155
        block.replace_by_position(result, std::move(dest_column_ptr));
2268
155
        return Status::OK();
2269
155
    }
2270
2271
private:
2272
    template <bool src_const, bool pattern_const>
2273
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
2274
34
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
81
        for (size_t i = 0; i < size; i++) {
2276
47
            const StringRef str_ref =
2277
47
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
47
            const StringRef pattern_ref =
2280
47
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
47
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
47
        }
2283
34
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2274
12
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
37
        for (size_t i = 0; i < size; i++) {
2276
25
            const StringRef str_ref =
2277
25
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
25
            const StringRef pattern_ref =
2280
25
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
25
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
25
        }
2283
12
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb0ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2274
11
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
22
        for (size_t i = 0; i < size; i++) {
2276
11
            const StringRef str_ref =
2277
11
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
11
            const StringRef pattern_ref =
2280
11
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
11
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
11
        }
2283
11
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb0EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2274
11
                  ColumnInt32::Container& dest_column_data, size_t size) const {
2275
22
        for (size_t i = 0; i < size; i++) {
2276
11
            const StringRef str_ref =
2277
11
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2278
2279
11
            const StringRef pattern_ref =
2280
11
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2281
11
            dest_column_data[i] = find_str_count(str_ref, pattern_ref);
2282
11
        }
2283
11
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8_executeILb1ELb1EEEvRKNS_9ColumnStrIjEES7_RNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
2284
2285
    template <bool src_const, bool pattern_const, bool start_pos_const>
2286
    void _execute(const ColumnString& src_column_string, const ColumnString& pattern_column,
2287
                  const ColumnInt32& start_pos_column, ColumnInt32::Container& dest_column_data,
2288
155
                  size_t size) const {
2289
334
        for (size_t i = 0; i < size; i++) {
2290
179
            const StringRef str_ref =
2291
179
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
179
            const StringRef pattern_ref =
2293
179
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
179
            int32_t start_pos =
2296
179
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
179
            const char* p = str_ref.begin();
2299
179
            const char* end = str_ref.end();
2300
179
            int char_size = 0;
2301
1.22k
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
1.04k
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
1.04k
            }
2304
179
            const auto start_byte_len = p - str_ref.begin();
2305
2306
179
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
115
                dest_column_data[i] = 0;
2308
115
            } else {
2309
64
                dest_column_data[i] =
2310
64
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
64
            }
2312
179
        }
2313
155
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
23
                  size_t size) const {
2289
70
        for (size_t i = 0; i < size; i++) {
2290
47
            const StringRef str_ref =
2291
47
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
47
            const StringRef pattern_ref =
2293
47
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
47
            int32_t start_pos =
2296
47
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
47
            const char* p = str_ref.begin();
2299
47
            const char* end = str_ref.end();
2300
47
            int char_size = 0;
2301
316
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
269
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
269
            }
2304
47
            const auto start_byte_len = p - str_ref.begin();
2305
2306
47
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
31
                dest_column_data[i] = 0;
2308
31
            } else {
2309
16
                dest_column_data[i] =
2310
16
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
16
            }
2312
47
        }
2313
23
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb0ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb0ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb0EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
Line
Count
Source
2288
22
                  size_t size) const {
2289
44
        for (size_t i = 0; i < size; i++) {
2290
22
            const StringRef str_ref =
2291
22
                    src_column_string.get_data_at(index_check_const<src_const>(i));
2292
22
            const StringRef pattern_ref =
2293
22
                    pattern_column.get_data_at(index_check_const<pattern_const>(i));
2294
            // 1-based index
2295
22
            int32_t start_pos =
2296
22
                    start_pos_column.get_element(index_check_const<start_pos_const>(i)) - 1;
2297
2298
22
            const char* p = str_ref.begin();
2299
22
            const char* end = str_ref.end();
2300
22
            int char_size = 0;
2301
151
            for (size_t j = 0; j < start_pos && p < end; ++j, p += char_size) {
2302
129
                char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
2303
129
            }
2304
22
            const auto start_byte_len = p - str_ref.begin();
2305
2306
22
            if (start_pos < 0 || start_byte_len >= str_ref.size) {
2307
14
                dest_column_data[i] = 0;
2308
14
            } else {
2309
8
                dest_column_data[i] =
2310
8
                        find_str_count(str_ref.substring(start_byte_len), pattern_ref);
2311
8
            }
2312
22
        }
2313
22
    }
Unexecuted instantiation: _ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8_executeILb1ELb1ELb1EEEvRKNS_9ColumnStrIjEES7_RKNS_12ColumnVectorILNS_13PrimitiveTypeE5EEERNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEEm
2314
2315
208
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2316
208
        size_t old_size = pos;
2317
208
        size_t str_size = str_ref.size;
2318
1.15k
        while (pos < str_size &&
2319
1.15k
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2320
1.06k
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2321
948
            pos++;
2322
948
        }
2323
208
        return pos - old_size;
2324
208
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE8find_posEmNS_9StringRefES3_
Line
Count
Source
2315
56
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2316
56
        size_t old_size = pos;
2317
56
        size_t str_size = str_ref.size;
2318
372
        while (pos < str_size &&
2319
372
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2320
344
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2321
316
            pos++;
2322
316
        }
2323
56
        return pos - old_size;
2324
56
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE8find_posEmNS_9StringRefES3_
Line
Count
Source
2315
152
    size_t find_pos(size_t pos, const StringRef str_ref, const StringRef pattern_ref) const {
2316
152
        size_t old_size = pos;
2317
152
        size_t str_size = str_ref.size;
2318
784
        while (pos < str_size &&
2319
784
               memcmp_small_allow_overflow15((const uint8_t*)str_ref.data + pos,
2320
720
                                             (const uint8_t*)pattern_ref.data, pattern_ref.size)) {
2321
632
            pos++;
2322
632
        }
2323
152
        return pos - old_size;
2324
152
    }
2325
2326
111
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2327
111
        int count = 0;
2328
111
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2329
19
            return 0;
2330
92
        } else {
2331
208
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2332
208
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2333
208
                if (res_pos == (str_ref.size - str_pos)) {
2334
92
                    break; // not find
2335
92
                }
2336
116
                count++;
2337
116
                str_pos = str_pos + res_pos + pattern_ref.size;
2338
116
            }
2339
92
        }
2340
92
        return count;
2341
111
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE0EE14find_str_countENS_9StringRefES3_
Line
Count
Source
2326
47
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2327
47
        int count = 0;
2328
47
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2329
19
            return 0;
2330
28
        } else {
2331
56
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2332
56
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2333
56
                if (res_pos == (str_ref.size - str_pos)) {
2334
28
                    break; // not find
2335
28
                }
2336
28
                count++;
2337
28
                str_pos = str_pos + res_pos + pattern_ref.size;
2338
28
            }
2339
28
        }
2340
28
        return count;
2341
47
    }
_ZNK5doris22FunctionCountSubStringILNS_26FunctionCountSubStringTypeE1EE14find_str_countENS_9StringRefES3_
Line
Count
Source
2326
64
    int find_str_count(const StringRef str_ref, StringRef pattern_ref) const {
2327
64
        int count = 0;
2328
64
        if (str_ref.size == 0 || pattern_ref.size == 0) {
2329
0
            return 0;
2330
64
        } else {
2331
152
            for (size_t str_pos = 0; str_pos <= str_ref.size;) {
2332
152
                const size_t res_pos = find_pos(str_pos, str_ref, pattern_ref);
2333
152
                if (res_pos == (str_ref.size - str_pos)) {
2334
64
                    break; // not find
2335
64
                }
2336
88
                count++;
2337
88
                str_pos = str_pos + res_pos + pattern_ref.size;
2338
88
            }
2339
64
        }
2340
64
        return count;
2341
64
    }
2342
};
2343
2344
struct SM3Sum {
2345
    static constexpr auto name = "sm3sum";
2346
    using ObjectData = SM3Digest;
2347
};
2348
2349
struct MD5Sum {
2350
    static constexpr auto name = "md5sum";
2351
    using ObjectData = Md5Digest;
2352
};
2353
2354
template <typename Impl>
2355
class FunctionStringDigestMulti : public IFunction {
2356
public:
2357
    static constexpr auto name = Impl::name;
2358
220
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6SM3SumEE6createEv
Line
Count
Source
2358
110
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
_ZN5doris25FunctionStringDigestMultiINS_6MD5SumEE6createEv
Line
Count
Source
2358
110
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestMulti>(); }
2359
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE8get_nameB5cxx11Ev
2360
0
    size_t get_number_of_arguments() const override { return 0; }
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE23get_number_of_argumentsEv
2361
206
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE11is_variadicEv
Line
Count
Source
2361
103
    bool is_variadic() const override { return true; }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE11is_variadicEv
Line
Count
Source
2361
103
    bool is_variadic() const override { return true; }
2362
2363
204
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2364
204
        return std::make_shared<DataTypeString>();
2365
204
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2363
102
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2364
102
        return std::make_shared<DataTypeString>();
2365
102
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2363
102
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2364
102
        return std::make_shared<DataTypeString>();
2365
102
    }
2366
2367
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2368
160
                        uint32_t result, size_t input_rows_count) const override {
2369
160
        DCHECK_GE(arguments.size(), 1);
2370
2371
160
        auto res = ColumnString::create();
2372
160
        auto& res_data = res->get_chars();
2373
160
        auto& res_offset = res->get_offsets();
2374
160
        res_offset.resize(input_rows_count);
2375
2376
160
        std::vector<ColumnPtr> argument_columns(arguments.size());
2377
160
        std::vector<uint8_t> is_const(arguments.size(), 0);
2378
536
        for (size_t i = 0; i < arguments.size(); ++i) {
2379
376
            std::tie(argument_columns[i], is_const[i]) =
2380
376
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2381
376
        }
2382
2383
160
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2384
80
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2385
80
                                         res_data, res_offset);
2386
80
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2387
80
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2388
80
                                            res_data, res_offset);
2389
80
        } else {
2390
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2391
0
                                        argument_columns[0]->get_name(), get_name());
2392
0
        }
2393
2394
160
        block.replace_by_position(result, std::move(res));
2395
160
        return Status::OK();
2396
160
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2368
80
                        uint32_t result, size_t input_rows_count) const override {
2369
80
        DCHECK_GE(arguments.size(), 1);
2370
2371
80
        auto res = ColumnString::create();
2372
80
        auto& res_data = res->get_chars();
2373
80
        auto& res_offset = res->get_offsets();
2374
80
        res_offset.resize(input_rows_count);
2375
2376
80
        std::vector<ColumnPtr> argument_columns(arguments.size());
2377
80
        std::vector<uint8_t> is_const(arguments.size(), 0);
2378
268
        for (size_t i = 0; i < arguments.size(); ++i) {
2379
188
            std::tie(argument_columns[i], is_const[i]) =
2380
188
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2381
188
        }
2382
2383
80
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2384
40
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2385
40
                                         res_data, res_offset);
2386
40
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2387
40
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2388
40
                                            res_data, res_offset);
2389
40
        } else {
2390
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2391
0
                                        argument_columns[0]->get_name(), get_name());
2392
0
        }
2393
2394
80
        block.replace_by_position(result, std::move(res));
2395
80
        return Status::OK();
2396
80
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2368
80
                        uint32_t result, size_t input_rows_count) const override {
2369
80
        DCHECK_GE(arguments.size(), 1);
2370
2371
80
        auto res = ColumnString::create();
2372
80
        auto& res_data = res->get_chars();
2373
80
        auto& res_offset = res->get_offsets();
2374
80
        res_offset.resize(input_rows_count);
2375
2376
80
        std::vector<ColumnPtr> argument_columns(arguments.size());
2377
80
        std::vector<uint8_t> is_const(arguments.size(), 0);
2378
268
        for (size_t i = 0; i < arguments.size(); ++i) {
2379
188
            std::tie(argument_columns[i], is_const[i]) =
2380
188
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2381
188
        }
2382
2383
80
        if (check_and_get_column<ColumnString>(argument_columns[0].get())) {
2384
40
            vector_execute<ColumnString>(block, input_rows_count, argument_columns, is_const,
2385
40
                                         res_data, res_offset);
2386
40
        } else if (check_and_get_column<ColumnVarbinary>(argument_columns[0].get())) {
2387
40
            vector_execute<ColumnVarbinary>(block, input_rows_count, argument_columns, is_const,
2388
40
                                            res_data, res_offset);
2389
40
        } else {
2390
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2391
0
                                        argument_columns[0]->get_name(), get_name());
2392
0
        }
2393
2394
80
        block.replace_by_position(result, std::move(res));
2395
80
        return Status::OK();
2396
80
    }
2397
2398
private:
2399
    template <typename ColumnType>
2400
    void vector_execute(Block& block, size_t input_rows_count,
2401
                        const std::vector<ColumnPtr>& argument_columns,
2402
                        const std::vector<uint8_t>& is_const, ColumnString::Chars& res_data,
2403
160
                        ColumnString::Offsets& res_offset) const {
2404
160
        using ObjectData = typename Impl::ObjectData;
2405
372
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
212
            ObjectData digest;
2407
676
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
464
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
464
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
464
                if (data_ref.size < 1) {
2411
120
                    continue;
2412
120
                }
2413
344
                digest.update(data_ref.data, data_ref.size);
2414
344
            }
2415
212
            digest.digest();
2416
212
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
212
                                        i, res_data, res_offset);
2418
212
        }
2419
160
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
2403
40
                        ColumnString::Offsets& res_offset) const {
2404
40
        using ObjectData = typename Impl::ObjectData;
2405
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
53
            ObjectData digest;
2407
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
116
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
86
                digest.update(data_ref.data, data_ref.size);
2414
86
            }
2415
53
            digest.digest();
2416
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
53
                                        i, res_data, res_offset);
2418
53
        }
2419
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6SM3SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
2403
40
                        ColumnString::Offsets& res_offset) const {
2404
40
        using ObjectData = typename Impl::ObjectData;
2405
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
53
            ObjectData digest;
2407
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
116
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
86
                digest.update(data_ref.data, data_ref.size);
2414
86
            }
2415
53
            digest.digest();
2416
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
53
                                        i, res_data, res_offset);
2418
53
        }
2419
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_9ColumnStrIjEEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrISA_EESaISD_EERKS8_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSM_IjLm4096ESP_Lm16ELm15EEE
Line
Count
Source
2403
40
                        ColumnString::Offsets& res_offset) const {
2404
40
        using ObjectData = typename Impl::ObjectData;
2405
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
53
            ObjectData digest;
2407
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
116
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
86
                digest.update(data_ref.data, data_ref.size);
2414
86
            }
2415
53
            digest.digest();
2416
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
53
                                        i, res_data, res_offset);
2418
53
        }
2419
40
    }
_ZNK5doris25FunctionStringDigestMultiINS_6MD5SumEE14vector_executeINS_15ColumnVarbinaryEEEvRNS_5BlockEmRKSt6vectorINS_3COWINS_7IColumnEE13immutable_ptrIS9_EESaISC_EERKS7_IhSaIhEERNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNSL_IjLm4096ESO_Lm16ELm15EEE
Line
Count
Source
2403
40
                        ColumnString::Offsets& res_offset) const {
2404
40
        using ObjectData = typename Impl::ObjectData;
2405
93
        for (size_t i = 0; i < input_rows_count; ++i) {
2406
53
            ObjectData digest;
2407
169
            for (size_t j = 0; j < argument_columns.size(); ++j) {
2408
116
                const auto* col = assert_cast<const ColumnType*>(argument_columns[j].get());
2409
116
                StringRef data_ref = col->get_data_at(is_const[j] ? 0 : i);
2410
116
                if (data_ref.size < 1) {
2411
30
                    continue;
2412
30
                }
2413
86
                digest.update(data_ref.data, data_ref.size);
2414
86
            }
2415
53
            digest.digest();
2416
53
            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
2417
53
                                        i, res_data, res_offset);
2418
53
        }
2419
40
    }
2420
};
2421
2422
class FunctionStringDigestSHA1 : public IFunction {
2423
public:
2424
    static constexpr auto name = "sha1";
2425
20
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA1>(); }
2426
0
    String get_name() const override { return name; }
2427
0
    size_t get_number_of_arguments() const override { return 1; }
2428
13
    bool is_variadic() const override { return true; }
2429
2430
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2431
12
        return std::make_shared<DataTypeString>();
2432
12
    }
2433
2434
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2435
10
                        uint32_t result, size_t input_rows_count) const override {
2436
10
        DCHECK_EQ(arguments.size(), 1);
2437
10
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
2438
2439
10
        auto res_col = ColumnString::create();
2440
10
        auto& res_data = res_col->get_chars();
2441
10
        auto& res_offset = res_col->get_offsets();
2442
10
        res_offset.resize(input_rows_count);
2443
10
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2444
5
            vector_execute(str_col, input_rows_count, res_data, res_offset);
2445
5
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2446
5
            vector_execute(vb_col, input_rows_count, res_data, res_offset);
2447
5
        } else {
2448
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
2449
0
                                        data_col->get_name(), get_name());
2450
0
        }
2451
2452
10
        block.replace_by_position(result, std::move(res_col));
2453
10
        return Status::OK();
2454
10
    }
2455
2456
private:
2457
    template <typename ColumnType>
2458
    void vector_execute(const ColumnType* col, size_t input_rows_count,
2459
10
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2460
10
        SHA1Digest digest;
2461
28
        for (size_t i = 0; i < input_rows_count; ++i) {
2462
18
            StringRef data_ref = col->get_data_at(i);
2463
18
            digest.reset(data_ref.data, data_ref.size);
2464
18
            std::string_view ans = digest.digest();
2465
2466
18
            StringOP::push_value_string(ans, i, res_data, res_offset);
2467
18
        }
2468
10
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_9ColumnStrIjEEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Line
Count
Source
2459
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2460
5
        SHA1Digest digest;
2461
14
        for (size_t i = 0; i < input_rows_count; ++i) {
2462
9
            StringRef data_ref = col->get_data_at(i);
2463
9
            digest.reset(data_ref.data, data_ref.size);
2464
9
            std::string_view ans = digest.digest();
2465
2466
9
            StringOP::push_value_string(ans, i, res_data, res_offset);
2467
9
        }
2468
5
    }
_ZNK5doris24FunctionStringDigestSHA114vector_executeINS_15ColumnVarbinaryEEEvPKT_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS6_IjLm4096ES9_Lm16ELm15EEE
Line
Count
Source
2459
5
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2460
5
        SHA1Digest digest;
2461
14
        for (size_t i = 0; i < input_rows_count; ++i) {
2462
9
            StringRef data_ref = col->get_data_at(i);
2463
9
            digest.reset(data_ref.data, data_ref.size);
2464
9
            std::string_view ans = digest.digest();
2465
2466
9
            StringOP::push_value_string(ans, i, res_data, res_offset);
2467
9
        }
2468
5
    }
2469
};
2470
2471
class FunctionStringDigestSHA2 : public IFunction {
2472
public:
2473
    static constexpr auto name = "sha2";
2474
8
    static FunctionPtr create() { return std::make_shared<FunctionStringDigestSHA2>(); }
2475
0
    String get_name() const override { return name; }
2476
0
    size_t get_number_of_arguments() const override { return 2; }
2477
1
    bool is_variadic() const override { return true; }
2478
2479
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2480
0
        return std::make_shared<DataTypeString>();
2481
0
    }
2482
2483
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2484
0
                        uint32_t result, size_t input_rows_count) const override {
2485
0
        DCHECK(!is_column_const(*block.get_by_position(arguments[0]).column));
2486
2487
0
        ColumnPtr data_col = block.get_by_position(arguments[0]).column;
2488
2489
0
        [[maybe_unused]] const auto& [right_column, right_const] =
2490
0
                unpack_if_const(block.get_by_position(arguments[1]).column);
2491
0
        auto digest_length = assert_cast<const ColumnInt32*>(right_column.get())->get_data()[0];
2492
2493
0
        auto res_col = ColumnString::create();
2494
0
        auto& res_data = res_col->get_chars();
2495
0
        auto& res_offset = res_col->get_offsets();
2496
0
        res_offset.resize(input_rows_count);
2497
2498
0
        if (digest_length == 224) {
2499
0
            execute_base<SHA224Digest>(data_col, input_rows_count, res_data, res_offset);
2500
0
        } else if (digest_length == 256) {
2501
0
            execute_base<SHA256Digest>(data_col, input_rows_count, res_data, res_offset);
2502
0
        } else if (digest_length == 384) {
2503
0
            execute_base<SHA384Digest>(data_col, input_rows_count, res_data, res_offset);
2504
0
        } else if (digest_length == 512) {
2505
0
            execute_base<SHA512Digest>(data_col, input_rows_count, res_data, res_offset);
2506
0
        } else {
2507
0
            return Status::InvalidArgument(
2508
0
                    "sha2's digest length only support 224/256/384/512 but meet {}", digest_length);
2509
0
        }
2510
2511
0
        block.replace_by_position(result, std::move(res_col));
2512
0
        return Status::OK();
2513
0
    }
2514
2515
private:
2516
    template <typename T>
2517
    void execute_base(ColumnPtr data_col, int input_rows_count, ColumnString::Chars& res_data,
2518
0
                      ColumnString::Offsets& res_offset) const {
2519
0
        if (const auto* str_col = check_and_get_column<ColumnString>(data_col.get())) {
2520
0
            vector_execute<T>(str_col, input_rows_count, res_data, res_offset);
2521
0
        } else if (const auto* vb_col = check_and_get_column<ColumnVarbinary>(data_col.get())) {
2522
0
            vector_execute<T>(vb_col, input_rows_count, res_data, res_offset);
2523
0
        } else {
2524
0
            throw Exception(ErrorCode::RUNTIME_ERROR,
2525
0
                            "Illegal column {} of argument of function {}", data_col->get_name(),
2526
0
                            get_name());
2527
0
        }
2528
0
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA224DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA256DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA384DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA212execute_baseINS_12SHA512DigestEEEvNS_3COWINS_7IColumnEE13immutable_ptrIS4_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
2529
2530
    template <typename DigestType, typename ColumnType>
2531
    void vector_execute(const ColumnType* col, size_t input_rows_count,
2532
0
                        ColumnString::Chars& res_data, ColumnString::Offsets& res_offset) const {
2533
0
        DigestType digest;
2534
0
        for (size_t i = 0; i < input_rows_count; ++i) {
2535
0
            StringRef data_ref = col->get_data_at(i);
2536
0
            digest.reset(data_ref.data, data_ref.size);
2537
0
            std::string_view ans = digest.digest();
2538
2539
0
            StringOP::push_value_string(ans, i, res_data, res_offset);
2540
0
        }
2541
0
    }
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA224DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA256DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA384DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_9ColumnStrIjEEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS8_IjLm4096ESB_Lm16ELm15EEE
Unexecuted instantiation: _ZNK5doris24FunctionStringDigestSHA214vector_executeINS_12SHA512DigestENS_15ColumnVarbinaryEEEvPKT0_mRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERNS7_IjLm4096ESA_Lm16ELm15EEE
2542
};
2543
2544
class FunctionExtractURLParameter : public IFunction {
2545
public:
2546
    static constexpr auto name = "extract_url_parameter";
2547
45
    static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); }
2548
1
    String get_name() const override { return name; }
2549
37
    size_t get_number_of_arguments() const override { return 2; }
2550
2551
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2552
37
        return std::make_shared<DataTypeString>();
2553
37
    }
2554
2555
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2556
37
                        uint32_t result, size_t input_rows_count) const override {
2557
37
        auto col_url =
2558
37
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
2559
37
        auto col_parameter =
2560
37
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
2561
37
        auto url_col = assert_cast<const ColumnString*>(col_url.get());
2562
37
        auto parameter_col = assert_cast<const ColumnString*>(col_parameter.get());
2563
2564
37
        ColumnString::MutablePtr col_res = ColumnString::create();
2565
2566
85
        for (int i = 0; i < input_rows_count; ++i) {
2567
48
            auto source = url_col->get_data_at(i);
2568
48
            auto param = parameter_col->get_data_at(i);
2569
48
            auto res = extract_url(source, param);
2570
2571
48
            col_res->insert_data(res.data, res.size);
2572
48
        }
2573
2574
37
        block.replace_by_position(result, std::move(col_res));
2575
37
        return Status::OK();
2576
37
    }
2577
2578
private:
2579
48
    StringRef extract_url(StringRef url, StringRef parameter) const {
2580
48
        if (url.size == 0 || parameter.size == 0) {
2581
8
            return StringRef("", 0);
2582
8
        }
2583
40
        return UrlParser::extract_url(url, parameter);
2584
48
    }
2585
};
2586
2587
class FunctionStringParseUrl : public IFunction {
2588
public:
2589
    static constexpr auto name = "parse_url";
2590
101
    static FunctionPtr create() { return std::make_shared<FunctionStringParseUrl>(); }
2591
0
    String get_name() const override { return name; }
2592
0
    size_t get_number_of_arguments() const override { return 0; }
2593
94
    bool is_variadic() const override { return true; }
2594
2595
93
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2596
93
        return make_nullable(std::make_shared<DataTypeString>());
2597
93
    }
2598
2599
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2600
93
                        uint32_t result, size_t input_rows_count) const override {
2601
93
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
2602
93
        auto& null_map_data = null_map->get_data();
2603
93
        DCHECK_GE(3, arguments.size());
2604
93
        auto res = ColumnString::create();
2605
93
        auto& res_offsets = res->get_offsets();
2606
93
        auto& res_chars = res->get_chars();
2607
93
        res_offsets.resize(input_rows_count);
2608
2609
93
        size_t argument_size = arguments.size();
2610
93
        const bool has_key = argument_size == 3;
2611
2612
93
        std::vector<ColumnPtr> argument_columns(argument_size);
2613
93
        std::vector<UInt8> col_const(argument_size);
2614
308
        for (size_t i = 0; i < argument_size; ++i) {
2615
215
            std::tie(argument_columns[i], col_const[i]) =
2616
215
                    unpack_if_const(block.get_by_position(arguments[i]).column);
2617
215
        }
2618
2619
93
        const auto* url_col = assert_cast<const ColumnString*>(argument_columns[0].get());
2620
93
        const auto* part_col = assert_cast<const ColumnString*>(argument_columns[1].get());
2621
93
        const bool part_const = col_const[1];
2622
93
        std::vector<UrlParser::UrlPart> url_parts;
2623
93
        const int part_nums = part_const ? 1 : input_rows_count;
2624
2625
93
        url_parts.resize(part_nums);
2626
209
        for (int i = 0; i < part_nums; i++) {
2627
116
            StringRef part = part_col->get_data_at(i);
2628
116
            UrlParser::UrlPart url_part = UrlParser::get_url_part(part);
2629
116
            if (url_part == UrlParser::INVALID) {
2630
0
                return Status::RuntimeError("Invalid URL part: {}\n{}",
2631
0
                                            std::string(part.data, part.size),
2632
0
                                            "(Valid URL parts are 'PROTOCOL', 'HOST', "
2633
0
                                            "'PATH', 'REF', 'AUTHORITY', "
2634
0
                                            "'FILE', 'USERINFO', 'PORT' and 'QUERY')");
2635
0
            }
2636
116
            url_parts[i] = url_part;
2637
116
        }
2638
2639
93
        if (has_key) {
2640
29
            const bool url_const = col_const[0];
2641
29
            const bool key_const = col_const[2];
2642
29
            const auto* key_col = assert_cast<const ColumnString*>(argument_columns[2].get());
2643
29
            RETURN_IF_ERROR(std::visit(
2644
29
                    [&](auto url_const, auto part_const, auto key_const) {
2645
29
                        return vector_parse_key<url_const, part_const, key_const>(
2646
29
                                url_col, url_parts, key_col, input_rows_count, null_map_data,
2647
29
                                res_chars, res_offsets);
2648
29
                    },
2649
29
                    make_bool_variant(url_const), make_bool_variant(part_const),
2650
29
                    make_bool_variant(key_const)));
2651
64
        } else {
2652
64
            const bool url_const = col_const[0];
2653
64
            RETURN_IF_ERROR(std::visit(
2654
64
                    [&](auto url_const, auto part_const) {
2655
64
                        return vector_parse<url_const, part_const>(url_col, url_parts,
2656
64
                                                                   input_rows_count, null_map_data,
2657
64
                                                                   res_chars, res_offsets);
2658
64
                    },
2659
64
                    make_bool_variant(url_const), make_bool_variant(part_const)));
2660
64
        }
2661
93
        block.get_by_position(result).column =
2662
93
                ColumnNullable::create(std::move(res), std::move(null_map));
2663
93
        return Status::OK();
2664
93
    }
2665
    template <bool url_const, bool part_const>
2666
    static Status vector_parse(const ColumnString* url_col,
2667
                               std::vector<UrlParser::UrlPart>& url_parts, const int size,
2668
                               ColumnUInt8::Container& null_map_data,
2669
64
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
148
        for (size_t i = 0; i < size; ++i) {
2671
84
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
84
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
84
            StringRef parse_res;
2674
84
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
64
                if (parse_res.empty()) [[unlikely]] {
2676
4
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
4
                    continue;
2678
4
                }
2679
60
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
60
                                            res_chars, res_offsets);
2681
60
            } else {
2682
20
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
20
            }
2684
84
        }
2685
64
        return Status::OK();
2686
64
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2669
22
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
64
        for (size_t i = 0; i < size; ++i) {
2671
42
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
42
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
42
            StringRef parse_res;
2674
42
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
32
                if (parse_res.empty()) [[unlikely]] {
2676
2
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
2
                    continue;
2678
2
                }
2679
30
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
30
                                            res_chars, res_offsets);
2681
30
            } else {
2682
10
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
10
            }
2684
42
        }
2685
22
        return Status::OK();
2686
22
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2669
21
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
42
        for (size_t i = 0; i < size; ++i) {
2671
21
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
21
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
21
            StringRef parse_res;
2674
21
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
16
                if (parse_res.empty()) [[unlikely]] {
2676
1
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
1
                    continue;
2678
1
                }
2679
15
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
15
                                            res_chars, res_offsets);
2681
15
            } else {
2682
5
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
5
            }
2684
21
        }
2685
21
        return Status::OK();
2686
21
    }
_ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2669
21
                               ColumnString::Chars& res_chars, ColumnString::Offsets& res_offsets) {
2670
42
        for (size_t i = 0; i < size; ++i) {
2671
21
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2672
21
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2673
21
            StringRef parse_res;
2674
21
            if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
2675
16
                if (parse_res.empty()) [[unlikely]] {
2676
1
                    StringOP::push_empty_string(i, res_chars, res_offsets);
2677
1
                    continue;
2678
1
                }
2679
15
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2680
15
                                            res_chars, res_offsets);
2681
15
            } else {
2682
5
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2683
5
            }
2684
21
        }
2685
21
        return Status::OK();
2686
21
    }
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl12vector_parseILb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EEiRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
2687
    template <bool url_const, bool part_const, bool key_const>
2688
    static Status vector_parse_key(const ColumnString* url_col,
2689
                                   std::vector<UrlParser::UrlPart>& url_parts,
2690
                                   const ColumnString* key_col, const int size,
2691
                                   ColumnUInt8::Container& null_map_data,
2692
                                   ColumnString::Chars& res_chars,
2693
29
                                   ColumnString::Offsets& res_offsets) {
2694
61
        for (size_t i = 0; i < size; ++i) {
2695
32
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
32
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
32
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
32
            StringRef parse_res;
2699
32
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
16
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
16
                                            res_chars, res_offsets);
2702
16
            } else {
2703
16
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
16
                continue;
2705
16
            }
2706
32
        }
2707
29
        return Status::OK();
2708
29
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
5
                                   ColumnString::Offsets& res_offsets) {
2694
13
        for (size_t i = 0; i < size; ++i) {
2695
8
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
8
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
8
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
8
            StringRef parse_res;
2699
8
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
4
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
4
                                            res_chars, res_offsets);
2702
4
            } else {
2703
4
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
4
                continue;
2705
4
            }
2706
8
        }
2707
5
        return Status::OK();
2708
5
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb0ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb0ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
_ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb0EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
Line
Count
Source
2693
4
                                   ColumnString::Offsets& res_offsets) {
2694
8
        for (size_t i = 0; i < size; ++i) {
2695
4
            UrlParser::UrlPart& url_part = url_parts[index_check_const<part_const>(i)];
2696
4
            StringRef url_val = url_col->get_data_at(index_check_const<url_const>(i));
2697
4
            StringRef url_key = key_col->get_data_at(index_check_const<key_const>(i));
2698
4
            StringRef parse_res;
2699
4
            if (UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res)) {
2700
2
                StringOP::push_value_string(std::string_view(parse_res.data, parse_res.size), i,
2701
2
                                            res_chars, res_offsets);
2702
2
            } else {
2703
2
                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
2704
2
                continue;
2705
2
            }
2706
4
        }
2707
4
        return Status::OK();
2708
4
    }
Unexecuted instantiation: _ZN5doris22FunctionStringParseUrl16vector_parse_keyILb1ELb1ELb1EEENS_6StatusEPKNS_9ColumnStrIjEERSt6vectorINS_9UrlParser7UrlPartESaIS9_EES6_iRNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESI_RNSD_IjLm4096ESG_Lm16ELm15EEE
2709
};
2710
2711
class FunctionUrlDecode : public IFunction {
2712
public:
2713
    static constexpr auto name = "url_decode";
2714
8
    static FunctionPtr create() { return std::make_shared<FunctionUrlDecode>(); }
2715
1
    String get_name() const override { return name; }
2716
0
    size_t get_number_of_arguments() const override { return 1; }
2717
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2718
0
        return std::make_shared<DataTypeString>();
2719
0
    }
2720
2721
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2722
0
                        uint32_t result, size_t input_rows_count) const override {
2723
0
        auto res = ColumnString::create();
2724
0
        res->get_offsets().reserve(input_rows_count);
2725
2726
0
        const auto* url_col =
2727
0
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
2728
2729
0
        std::string decoded_url;
2730
0
        for (size_t i = 0; i < input_rows_count; ++i) {
2731
0
            auto url = url_col->get_data_at(i);
2732
0
            if (!url_decode(url.to_string(), &decoded_url)) {
2733
0
                return Status::InternalError("Decode url failed");
2734
0
            }
2735
0
            res->insert_data(decoded_url.data(), decoded_url.size());
2736
0
            decoded_url.clear();
2737
0
        }
2738
2739
0
        block.get_by_position(result).column = std::move(res);
2740
0
        return Status::OK();
2741
0
    }
2742
};
2743
2744
class FunctionUrlEncode : public IFunction {
2745
public:
2746
    static constexpr auto name = "url_encode";
2747
12
    static FunctionPtr create() { return std::make_shared<FunctionUrlEncode>(); }
2748
1
    String get_name() const override { return name; }
2749
4
    size_t get_number_of_arguments() const override { return 1; }
2750
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2751
4
        return std::make_shared<DataTypeString>();
2752
4
    }
2753
2754
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2755
4
                        uint32_t result, size_t input_rows_count) const override {
2756
4
        auto res = ColumnString::create();
2757
4
        res->get_offsets().reserve(input_rows_count);
2758
2759
4
        const auto* url_col =
2760
4
                assert_cast<const ColumnString*>(block.get_by_position(arguments[0]).column.get());
2761
2762
4
        std::string encoded_url;
2763
10
        for (size_t i = 0; i < input_rows_count; ++i) {
2764
6
            auto url = url_col->get_data_at(i);
2765
6
            url_encode(url.to_string_view(), &encoded_url);
2766
6
            res->insert_data(encoded_url.data(), encoded_url.size());
2767
6
            encoded_url.clear();
2768
6
        }
2769
2770
4
        block.get_by_position(result).column = std::move(res);
2771
4
        return Status::OK();
2772
4
    }
2773
};
2774
2775
class FunctionRandomBytes : public IFunction {
2776
public:
2777
    static constexpr auto name = "random_bytes";
2778
8
    static FunctionPtr create() { return std::make_shared<FunctionRandomBytes>(); }
2779
1
    String get_name() const override { return name; }
2780
0
    size_t get_number_of_arguments() const override { return 1; }
2781
1
    bool is_variadic() const override { return false; }
2782
2783
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2784
0
        return std::make_shared<DataTypeString>();
2785
0
    }
2786
2787
0
    bool use_default_implementation_for_constants() const final { return false; }
2788
2789
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2790
0
                        uint32_t result, size_t input_rows_count) const override {
2791
0
        auto res = ColumnString::create();
2792
0
        auto& res_offsets = res->get_offsets();
2793
0
        auto& res_chars = res->get_chars();
2794
0
        res_offsets.resize(input_rows_count);
2795
2796
0
        auto [arg_col, arg_const] = unpack_if_const(block.get_by_position(arguments[0]).column);
2797
0
        const auto* length_col = assert_cast<const ColumnInt32*>(arg_col.get());
2798
2799
0
        if (arg_const) {
2800
0
            res_chars.reserve(input_rows_count * (length_col->get_element(0) + 2));
2801
0
        }
2802
2803
0
        std::vector<uint8_t, Allocator_<uint8_t>> random_bytes;
2804
0
        std::random_device rd;
2805
0
        std::mt19937 gen(rd());
2806
2807
0
        std::uniform_int_distribution<unsigned short> distribution(0, 255);
2808
0
        for (size_t i = 0; i < input_rows_count; ++i) {
2809
0
            size_t index = index_check_const(i, arg_const);
2810
0
            if (length_col->get_element(index) < 0) [[unlikely]] {
2811
0
                return Status::InvalidArgument("argument {} of function {} at row {} was invalid.",
2812
0
                                               length_col->get_element(index), name, index);
2813
0
            }
2814
0
            random_bytes.resize(length_col->get_element(index));
2815
2816
0
            for (auto& byte : random_bytes) {
2817
0
                byte = distribution(gen) & 0xFF;
2818
0
            }
2819
2820
0
            std::basic_ostringstream<char, std::char_traits<char>, Allocator_<char>> oss;
2821
0
            for (const auto& byte : random_bytes) {
2822
0
                oss << std::setw(2) << std::setfill('0') << std::hex << static_cast<int>(byte);
2823
0
            }
2824
2825
0
            StringOP::push_value_string("0x" + oss.str(), i, res_chars, res_offsets);
2826
0
            random_bytes.clear();
2827
0
        }
2828
2829
0
        block.get_by_position(result).column = std::move(res);
2830
2831
0
        return Status::OK();
2832
0
    }
2833
};
2834
2835
template <typename Impl>
2836
class FunctionMoneyFormat : public IFunction {
2837
public:
2838
    static constexpr auto name = "money_format";
2839
73
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE6createEv
Line
Count
Source
2839
9
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE6createEv
Line
Count
Source
2839
9
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE6createEv
Line
Count
Source
2839
9
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE6createEv
Line
Count
Source
2839
10
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE6createEv
Line
Count
Source
2839
8
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE6createEv
Line
Count
Source
2839
12
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE6createEv
Line
Count
Source
2839
8
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
_ZN5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE6createEv
Line
Count
Source
2839
8
    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat>(); }
2840
8
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE8get_nameB5cxx11Ev
Line
Count
Source
2840
1
    String get_name() const override { return name; }
2841
2842
8
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
8
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
8
        return std::make_shared<DataTypeString>();
2849
8
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2842
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
1
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
1
        return std::make_shared<DataTypeString>();
2849
1
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2842
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
1
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
1
        return std::make_shared<DataTypeString>();
2849
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
2842
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
1
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
1
        return std::make_shared<DataTypeString>();
2849
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2842
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
1
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
1
        return std::make_shared<DataTypeString>();
2849
1
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
2842
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
2843
4
        if (arguments.size() != 1) {
2844
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
2845
0
                                   "Function {} requires exactly 1 argument", name);
2846
0
        }
2847
2848
4
        return std::make_shared<DataTypeString>();
2849
4
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
2850
56
    DataTypes get_variadic_argument_types_impl() const override {
2851
56
        return Impl::get_variadic_argument_types();
2852
56
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE32get_variadic_argument_types_implEv
Line
Count
Source
2850
7
    DataTypes get_variadic_argument_types_impl() const override {
2851
7
        return Impl::get_variadic_argument_types();
2852
7
    }
2853
8
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE23get_number_of_argumentsEv
Line
Count
Source
2853
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE23get_number_of_argumentsEv
Line
Count
Source
2853
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE23get_number_of_argumentsEv
Line
Count
Source
2853
1
    size_t get_number_of_arguments() const override { return 1; }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE23get_number_of_argumentsEv
Line
Count
Source
2853
1
    size_t get_number_of_arguments() const override { return 1; }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE23get_number_of_argumentsEv
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE23get_number_of_argumentsEv
Line
Count
Source
2853
4
    size_t get_number_of_arguments() const override { return 1; }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE23get_number_of_argumentsEv
2854
2855
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
2856
8
                        uint32_t result, size_t input_rows_count) const override {
2857
8
        auto res_column = ColumnString::create();
2858
8
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
8
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
8
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
8
        block.replace_by_position(result, std::move(res_column));
2865
8
        return Status::OK();
2866
8
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatDoubleImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
1
                        uint32_t result, size_t input_rows_count) const override {
2857
1
        auto res_column = ColumnString::create();
2858
1
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
1
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
1
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
1
        block.replace_by_position(result, std::move(res_column));
2865
1
        return Status::OK();
2866
1
    }
_ZNK5doris19FunctionMoneyFormatINS_20MoneyFormatInt64ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
1
                        uint32_t result, size_t input_rows_count) const override {
2857
1
        auto res_column = ColumnString::create();
2858
1
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
1
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
1
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
1
        block.replace_by_position(result, std::move(res_column));
2865
1
        return Status::OK();
2866
1
    }
_ZNK5doris19FunctionMoneyFormatINS_21MoneyFormatInt128ImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
1
                        uint32_t result, size_t input_rows_count) const override {
2857
1
        auto res_column = ColumnString::create();
2858
1
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
1
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
1
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
1
        block.replace_by_position(result, std::move(res_column));
2865
1
        return Status::OK();
2866
1
    }
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
2
                        uint32_t result, size_t input_rows_count) const override {
2857
2
        auto res_column = ColumnString::create();
2858
2
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
2
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
2
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
2
        block.replace_by_position(result, std::move(res_column));
2865
2
        return Status::OK();
2866
2
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
2856
3
                        uint32_t result, size_t input_rows_count) const override {
2857
3
        auto res_column = ColumnString::create();
2858
3
        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
2859
2860
3
        auto result_column = assert_cast<ColumnString*>(res_column.get());
2861
2862
3
        Impl::execute(context, result_column, argument_column, input_rows_count);
2863
2864
3
        block.replace_by_position(result, std::move(res_column));
2865
3
        return Status::OK();
2866
3
    }
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris19FunctionMoneyFormatINS_22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
2867
};
2868
2869
// ----------------------------------------------------------------------
2870
// SimpleItoaWithCommas()
2871
//    Description: converts an integer to a string.
2872
//    Puts commas every 3 spaces.
2873
//    Faster than printf("%d")?
2874
//
2875
//    Return value: string
2876
// ----------------------------------------------------------------------
2877
template <typename T>
2878
56
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2879
56
    char* p = buffer + buffer_size;
2880
    // Need to use unsigned T instead of T to correctly handle
2881
56
    std::make_unsigned_t<T> n = i;
2882
56
    if (i < 0) {
2883
20
        n = 0 - n;
2884
20
    }
2885
56
    *--p = '0' + n % 10; // this case deals with the number "0"
2886
56
    n /= 10;
2887
142
    while (n) {
2888
127
        *--p = '0' + n % 10;
2889
127
        n /= 10;
2890
127
        if (n == 0) {
2891
25
            break;
2892
25
        }
2893
2894
102
        *--p = '0' + n % 10;
2895
102
        n /= 10;
2896
102
        if (n == 0) {
2897
16
            break;
2898
16
        }
2899
2900
86
        *--p = ',';
2901
86
        *--p = '0' + n % 10;
2902
86
        n /= 10;
2903
        // For this unrolling, we check if n == 0 in the main while loop
2904
86
    }
2905
56
    if (i < 0) {
2906
20
        *--p = '-';
2907
20
    }
2908
56
    return p;
2909
56
}
_ZN5doris20SimpleItoaWithCommasIlEEPcT_S1_i
Line
Count
Source
2878
27
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2879
27
    char* p = buffer + buffer_size;
2880
    // Need to use unsigned T instead of T to correctly handle
2881
27
    std::make_unsigned_t<T> n = i;
2882
27
    if (i < 0) {
2883
10
        n = 0 - n;
2884
10
    }
2885
27
    *--p = '0' + n % 10; // this case deals with the number "0"
2886
27
    n /= 10;
2887
68
    while (n) {
2888
63
        *--p = '0' + n % 10;
2889
63
        n /= 10;
2890
63
        if (n == 0) {
2891
17
            break;
2892
17
        }
2893
2894
46
        *--p = '0' + n % 10;
2895
46
        n /= 10;
2896
46
        if (n == 0) {
2897
5
            break;
2898
5
        }
2899
2900
41
        *--p = ',';
2901
41
        *--p = '0' + n % 10;
2902
41
        n /= 10;
2903
        // For this unrolling, we check if n == 0 in the main while loop
2904
41
    }
2905
27
    if (i < 0) {
2906
10
        *--p = '-';
2907
10
    }
2908
27
    return p;
2909
27
}
_ZN5doris20SimpleItoaWithCommasInEEPcT_S1_i
Line
Count
Source
2878
29
char* SimpleItoaWithCommas(T i, char* buffer, int32_t buffer_size) {
2879
29
    char* p = buffer + buffer_size;
2880
    // Need to use unsigned T instead of T to correctly handle
2881
29
    std::make_unsigned_t<T> n = i;
2882
29
    if (i < 0) {
2883
10
        n = 0 - n;
2884
10
    }
2885
29
    *--p = '0' + n % 10; // this case deals with the number "0"
2886
29
    n /= 10;
2887
74
    while (n) {
2888
64
        *--p = '0' + n % 10;
2889
64
        n /= 10;
2890
64
        if (n == 0) {
2891
8
            break;
2892
8
        }
2893
2894
56
        *--p = '0' + n % 10;
2895
56
        n /= 10;
2896
56
        if (n == 0) {
2897
11
            break;
2898
11
        }
2899
2900
45
        *--p = ',';
2901
45
        *--p = '0' + n % 10;
2902
45
        n /= 10;
2903
        // For this unrolling, we check if n == 0 in the main while loop
2904
45
    }
2905
29
    if (i < 0) {
2906
10
        *--p = '-';
2907
10
    }
2908
29
    return p;
2909
29
}
2910
2911
namespace MoneyFormat {
2912
2913
0
constexpr size_t MAX_FORMAT_LEN_DEC32() {
2914
0
    // Decimal(9, 0)
2915
0
    // Double the size to avoid some unexpected bug.
2916
0
    return 2 * (1 + 9 + (9 / 3) + 3);
2917
0
}
2918
2919
0
constexpr size_t MAX_FORMAT_LEN_DEC64() {
2920
0
    // Decimal(18, 0)
2921
0
    // Double the size to avoid some unexpected bug.
2922
0
    return 2 * (1 + 18 + (18 / 3) + 3);
2923
0
}
2924
2925
0
constexpr size_t MAX_FORMAT_LEN_DEC128V2() {
2926
0
    // DecimalV2 has at most 27 digits
2927
0
    // Double the size to avoid some unexpected bug.
2928
0
    return 2 * (1 + 27 + (27 / 3) + 3);
2929
0
}
2930
2931
0
constexpr size_t MAX_FORMAT_LEN_DEC128V3() {
2932
0
    // Decimal(38, 0)
2933
0
    // Double the size to avoid some unexpected bug.
2934
0
    return 2 * (1 + 39 + (39 / 3) + 3);
2935
0
}
2936
2937
0
constexpr size_t MAX_FORMAT_LEN_INT64() {
2938
0
    // INT_MIN = -9223372036854775807
2939
0
    // Double the size to avoid some unexpected bug.
2940
0
    return 2 * (1 + 20 + (20 / 3) + 3);
2941
0
}
2942
2943
0
constexpr size_t MAX_FORMAT_LEN_INT128() {
2944
0
    // INT128_MIN = -170141183460469231731687303715884105728
2945
0
    return 2 * (1 + 39 + (39 / 3) + 3);
2946
0
}
2947
2948
template <typename T, size_t N>
2949
25
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
25
    static_assert(std::is_integral<T>::value);
2951
25
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
25
    if (scale > 2) {
2956
19
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
19
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
19
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
19
        frac_value /= 10;
2963
19
    } else if (scale < 2) {
2964
6
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
6
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
6
    }
2968
2969
25
    if (frac_value == 100) {
2970
3
        if (is_negative) {
2971
2
            int_value -= 1;
2972
2
        } else {
2973
1
            int_value += 1;
2974
1
        }
2975
3
        frac_value = 0;
2976
3
    }
2977
2978
25
    bool append_sign_manually = false;
2979
25
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
2
        append_sign_manually = true;
2984
2
    }
2985
2986
25
    char local[N];
2987
25
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
25
    const Int32 integer_str_len = N - (p - local);
2989
25
    const Int32 frac_str_len = 2;
2990
25
    const Int32 whole_decimal_str_len =
2991
25
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
25
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
25
    char* result_data = const_cast<char*>(result.data);
2996
2997
25
    if (append_sign_manually) {
2998
2
        memset(result_data, '-', 1);
2999
2
    }
3000
3001
25
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
25
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
25
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
25
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
25
    return result;
3006
25
};
_ZN5doris11MoneyFormat15do_money_formatIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
3
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
3
    static_assert(std::is_integral<T>::value);
2951
3
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
3
    if (scale > 2) {
2956
0
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
0
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
0
        frac_value /= 10;
2963
3
    } else if (scale < 2) {
2964
3
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
3
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
3
    }
2968
2969
3
    if (frac_value == 100) {
2970
0
        if (is_negative) {
2971
0
            int_value -= 1;
2972
0
        } else {
2973
0
            int_value += 1;
2974
0
        }
2975
0
        frac_value = 0;
2976
0
    }
2977
2978
3
    bool append_sign_manually = false;
2979
3
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
0
        append_sign_manually = true;
2984
0
    }
2985
2986
3
    char local[N];
2987
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
3
    const Int32 integer_str_len = N - (p - local);
2989
3
    const Int32 frac_str_len = 2;
2990
3
    const Int32 whole_decimal_str_len =
2991
3
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
3
    char* result_data = const_cast<char*>(result.data);
2996
2997
3
    if (append_sign_manually) {
2998
0
        memset(result_data, '-', 1);
2999
0
    }
3000
3001
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
3
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
3
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
3
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
3
    return result;
3006
3
};
_ZN5doris11MoneyFormat15do_money_formatInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
3
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
3
    static_assert(std::is_integral<T>::value);
2951
3
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
3
    if (scale > 2) {
2956
0
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
0
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
0
        frac_value /= 10;
2963
3
    } else if (scale < 2) {
2964
3
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
3
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
3
    }
2968
2969
3
    if (frac_value == 100) {
2970
0
        if (is_negative) {
2971
0
            int_value -= 1;
2972
0
        } else {
2973
0
            int_value += 1;
2974
0
        }
2975
0
        frac_value = 0;
2976
0
    }
2977
2978
3
    bool append_sign_manually = false;
2979
3
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
0
        append_sign_manually = true;
2984
0
    }
2985
2986
3
    char local[N];
2987
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
3
    const Int32 integer_str_len = N - (p - local);
2989
3
    const Int32 frac_str_len = 2;
2990
3
    const Int32 whole_decimal_str_len =
2991
3
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
3
    char* result_data = const_cast<char*>(result.data);
2996
2997
3
    if (append_sign_manually) {
2998
0
        memset(result_data, '-', 1);
2999
0
    }
3000
3001
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
3
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
3
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
3
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
3
    return result;
3006
3
};
_ZN5doris11MoneyFormat15do_money_formatInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
14
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
14
    static_assert(std::is_integral<T>::value);
2951
14
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
14
    if (scale > 2) {
2956
14
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
14
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
14
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
14
        frac_value /= 10;
2963
14
    } else if (scale < 2) {
2964
0
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
0
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
0
    }
2968
2969
14
    if (frac_value == 100) {
2970
3
        if (is_negative) {
2971
2
            int_value -= 1;
2972
2
        } else {
2973
1
            int_value += 1;
2974
1
        }
2975
3
        frac_value = 0;
2976
3
    }
2977
2978
14
    bool append_sign_manually = false;
2979
14
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
2
        append_sign_manually = true;
2984
2
    }
2985
2986
14
    char local[N];
2987
14
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
14
    const Int32 integer_str_len = N - (p - local);
2989
14
    const Int32 frac_str_len = 2;
2990
14
    const Int32 whole_decimal_str_len =
2991
14
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
14
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
14
    char* result_data = const_cast<char*>(result.data);
2996
2997
14
    if (append_sign_manually) {
2998
2
        memset(result_data, '-', 1);
2999
2
    }
3000
3001
14
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
14
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
14
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
14
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
14
    return result;
3006
14
};
Unexecuted instantiation: _ZN5doris11MoneyFormat15do_money_formatIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
_ZN5doris11MoneyFormat15do_money_formatIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_
Line
Count
Source
2949
5
StringRef do_money_format(FunctionContext* context, UInt32 scale, T int_value, T frac_value) {
2950
5
    static_assert(std::is_integral<T>::value);
2951
5
    const bool is_negative = int_value < 0 || frac_value < 0;
2952
2953
    // do round to frac_part
2954
    // magic number 2: since we need to round frac_part to 2 digits
2955
5
    if (scale > 2) {
2956
5
        DCHECK(scale <= 38);
2957
        // do rounding, so we need to reserve 3 digits.
2958
5
        auto multiplier = common::exp10_i128(std::abs(static_cast<int>(scale - 3)));
2959
        // do devide first to avoid overflow
2960
        // after round frac_value will be positive by design.
2961
5
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
2962
5
        frac_value /= 10;
2963
5
    } else if (scale < 2) {
2964
0
        DCHECK(frac_value < 100);
2965
        // since scale <= 2, overflow is impossiable
2966
0
        frac_value = frac_value * common::exp10_i32(2 - scale);
2967
0
    }
2968
2969
5
    if (frac_value == 100) {
2970
0
        if (is_negative) {
2971
0
            int_value -= 1;
2972
0
        } else {
2973
0
            int_value += 1;
2974
0
        }
2975
0
        frac_value = 0;
2976
0
    }
2977
2978
5
    bool append_sign_manually = false;
2979
5
    if (is_negative && int_value == 0) {
2980
        // when int_value is 0, result of SimpleItoaWithCommas will contains just zero
2981
        // for Decimal like -0.1234, this will leads to problem, because negative sign is discarded.
2982
        // this is why we introduce argument append_sing_manually.
2983
0
        append_sign_manually = true;
2984
0
    }
2985
2986
5
    char local[N];
2987
5
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
2988
5
    const Int32 integer_str_len = N - (p - local);
2989
5
    const Int32 frac_str_len = 2;
2990
5
    const Int32 whole_decimal_str_len =
2991
5
            (append_sign_manually ? 1 : 0) + integer_str_len + 1 + frac_str_len;
2992
2993
5
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
2994
    // Modify a string passed via stringref
2995
5
    char* result_data = const_cast<char*>(result.data);
2996
2997
5
    if (append_sign_manually) {
2998
0
        memset(result_data, '-', 1);
2999
0
    }
3000
3001
5
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3002
5
    *(result_data + whole_decimal_str_len - 3) = '.';
3003
5
    *(result_data + whole_decimal_str_len - 2) = '0' + std::abs(static_cast<int>(frac_value / 10));
3004
5
    *(result_data + whole_decimal_str_len - 1) = '0' + std::abs(static_cast<int>(frac_value % 10));
3005
5
    return result;
3006
5
};
3007
3008
// Note string value must be valid decimal string which contains two digits after the decimal point
3009
4
static StringRef do_money_format(FunctionContext* context, const std::string& value) {
3010
4
    bool is_positive = (value[0] != '-');
3011
4
    int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3;
3012
4
    StringRef result = context->create_temp_string_val(result_len);
3013
    // Modify a string passed via stringref
3014
4
    char* result_data = const_cast<char*>(result.data);
3015
4
    if (!is_positive) {
3016
2
        *result_data = '-';
3017
2
    }
3018
10
    for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) {
3019
9
        *(result_data + j) = *(value.data() + i);
3020
9
        if (i - 1 < 0) {
3021
2
            break;
3022
2
        }
3023
7
        *(result_data + j - 1) = *(value.data() + i - 1);
3024
7
        if (i - 2 < 0) {
3025
1
            break;
3026
1
        }
3027
6
        *(result_data + j - 2) = *(value.data() + i - 2);
3028
6
        if (j - 3 > 1 || (j - 3 == 1 && is_positive)) {
3029
4
            *(result_data + j - 3) = ',';
3030
4
            j -= 4;
3031
4
        } else {
3032
2
            j -= 3;
3033
2
        }
3034
6
    }
3035
4
    memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3);
3036
4
    return result;
3037
4
};
function_string.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Line
Count
Source
3009
4
static StringRef do_money_format(FunctionContext* context, const std::string& value) {
3010
4
    bool is_positive = (value[0] != '-');
3011
4
    int32_t result_len = value.size() + (value.size() - (is_positive ? 4 : 5)) / 3;
3012
4
    StringRef result = context->create_temp_string_val(result_len);
3013
    // Modify a string passed via stringref
3014
4
    char* result_data = const_cast<char*>(result.data);
3015
4
    if (!is_positive) {
3016
2
        *result_data = '-';
3017
2
    }
3018
10
    for (int i = value.size() - 4, j = result_len - 4; i >= 0; i = i - 3) {
3019
9
        *(result_data + j) = *(value.data() + i);
3020
9
        if (i - 1 < 0) {
3021
2
            break;
3022
2
        }
3023
7
        *(result_data + j - 1) = *(value.data() + i - 1);
3024
7
        if (i - 2 < 0) {
3025
1
            break;
3026
1
        }
3027
6
        *(result_data + j - 2) = *(value.data() + i - 2);
3028
6
        if (j - 3 > 1 || (j - 3 == 1 && is_positive)) {
3029
4
            *(result_data + j - 3) = ',';
3030
4
            j -= 4;
3031
4
        } else {
3032
2
            j -= 3;
3033
2
        }
3034
6
    }
3035
4
    memcpy(result_data + result_len - 3, value.data() + value.size() - 3, 3);
3036
4
    return result;
3037
4
};
Unexecuted instantiation: function_split_by_regexp.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: pipeline_fragment_context.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: viceberg_table_writer.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: partition_transformers.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
Unexecuted instantiation: spill_iceberg_table_sink_operator.cpp:_ZN5doris11MoneyFormatL15do_money_formatEPNS_15FunctionContextERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
3038
3039
} // namespace MoneyFormat
3040
3041
namespace FormatRound {
3042
3043
0
constexpr size_t MAX_FORMAT_LEN_DEC32() {
3044
0
    // Decimal(9, 0)
3045
0
    // Double the size to avoid some unexpected bug.
3046
0
    return 2 * (1 + 9 + (9 / 3) + 3);
3047
0
}
3048
3049
0
constexpr size_t MAX_FORMAT_LEN_DEC64() {
3050
0
    // Decimal(18, 0)
3051
0
    // Double the size to avoid some unexpected bug.
3052
0
    return 2 * (1 + 18 + (18 / 3) + 3);
3053
0
}
3054
3055
0
constexpr size_t MAX_FORMAT_LEN_DEC128V2() {
3056
0
    // DecimalV2 has at most 27 digits
3057
0
    // Double the size to avoid some unexpected bug.
3058
0
    return 2 * (1 + 27 + (27 / 3) + 3);
3059
0
}
3060
3061
0
constexpr size_t MAX_FORMAT_LEN_DEC128V3() {
3062
0
    // Decimal(38, 0)
3063
0
    // Double the size to avoid some unexpected bug.
3064
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3065
0
}
3066
3067
0
constexpr size_t MAX_FORMAT_LEN_INT64() {
3068
0
    // INT_MIN = -9223372036854775807
3069
0
    // Double the size to avoid some unexpected bug.
3070
0
    return 2 * (1 + 20 + (20 / 3) + 3);
3071
0
}
3072
3073
0
constexpr size_t MAX_FORMAT_LEN_INT128() {
3074
0
    // INT128_MIN = -170141183460469231731687303715884105728
3075
0
    return 2 * (1 + 39 + (39 / 3) + 3);
3076
0
}
3077
3078
template <typename T, size_t N>
3079
StringRef do_format_round(FunctionContext* context, UInt32 scale, T int_value, T frac_value,
3080
31
                          Int32 decimal_places) {
3081
31
    static_assert(std::is_integral<T>::value);
3082
31
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
31
    if (scale > decimal_places && decimal_places > 0) {
3086
14
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
14
        auto multiplier =
3089
14
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
14
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
14
        frac_value /= 10;
3094
17
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
13
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
13
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
31
    T decimal_power = common::exp10_i32(decimal_places);
3101
31
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
31
    bool append_sign_manually = false;
3111
31
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
31
    char local[N];
3116
31
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
31
    const Int32 integer_str_len = N - (p - local);
3118
31
    const Int32 frac_str_len = decimal_places;
3119
31
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
31
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
31
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
31
    char* result_data = const_cast<char*>(result.data);
3125
3126
31
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
31
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
31
    if (decimal_places > 0) {
3132
27
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
27
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
31
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
139
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
108
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
108
        remaining_frac /= 10;
3140
108
    }
3141
31
    return result;
3142
31
}
_ZN5doris11FormatRound15do_format_roundIlLm60EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
8
                          Int32 decimal_places) {
3081
8
    static_assert(std::is_integral<T>::value);
3082
8
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
8
    if (scale > decimal_places && decimal_places > 0) {
3086
0
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
0
        auto multiplier =
3089
0
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
0
        frac_value /= 10;
3094
8
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
6
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
6
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
8
    T decimal_power = common::exp10_i32(decimal_places);
3101
8
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
8
    bool append_sign_manually = false;
3111
8
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
8
    char local[N];
3116
8
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
8
    const Int32 integer_str_len = N - (p - local);
3118
8
    const Int32 frac_str_len = decimal_places;
3119
8
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
8
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
8
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
8
    char* result_data = const_cast<char*>(result.data);
3125
3126
8
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
8
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
8
    if (decimal_places > 0) {
3132
6
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
6
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
8
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
47
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
39
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
39
        remaining_frac /= 10;
3140
39
    }
3141
8
    return result;
3142
8
}
_ZN5doris11FormatRound15do_format_roundInLm112EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
9
                          Int32 decimal_places) {
3081
9
    static_assert(std::is_integral<T>::value);
3082
9
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
9
    if (scale > decimal_places && decimal_places > 0) {
3086
0
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
0
        auto multiplier =
3089
0
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
0
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
0
        frac_value /= 10;
3094
9
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
7
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
7
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
9
    T decimal_power = common::exp10_i32(decimal_places);
3101
9
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
9
    bool append_sign_manually = false;
3111
9
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
9
    char local[N];
3116
9
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
9
    const Int32 integer_str_len = N - (p - local);
3118
9
    const Int32 frac_str_len = decimal_places;
3119
9
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
9
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
9
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
9
    char* result_data = const_cast<char*>(result.data);
3125
3126
9
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
9
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
9
    if (decimal_places > 0) {
3132
7
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
7
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
9
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
50
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
41
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
41
        remaining_frac /= 10;
3140
41
    }
3141
9
    return result;
3142
9
}
_ZN5doris11FormatRound15do_format_roundInLm80EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
3
                          Int32 decimal_places) {
3081
3
    static_assert(std::is_integral<T>::value);
3082
3
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
3
    if (scale > decimal_places && decimal_places > 0) {
3086
3
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
3
        auto multiplier =
3089
3
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
3
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
3
        frac_value /= 10;
3094
3
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
0
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
0
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
3
    T decimal_power = common::exp10_i32(decimal_places);
3101
3
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
3
    bool append_sign_manually = false;
3111
3
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
3
    char local[N];
3116
3
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
3
    const Int32 integer_str_len = N - (p - local);
3118
3
    const Int32 frac_str_len = decimal_places;
3119
3
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
3
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
3
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
3
    char* result_data = const_cast<char*>(result.data);
3125
3126
3
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
3
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
3
    if (decimal_places > 0) {
3132
3
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
3
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
3
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
9
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
6
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
6
        remaining_frac /= 10;
3140
6
    }
3141
3
    return result;
3142
3
}
Unexecuted instantiation: _ZN5doris11FormatRound15do_format_roundIlLm32EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
_ZN5doris11FormatRound15do_format_roundIlLm56EEENS_9StringRefEPNS_15FunctionContextEjT_S5_i
Line
Count
Source
3080
11
                          Int32 decimal_places) {
3081
11
    static_assert(std::is_integral<T>::value);
3082
11
    const bool is_negative = int_value < 0 || frac_value < 0;
3083
3084
    // do round to frac_part based on decimal_places
3085
11
    if (scale > decimal_places && decimal_places > 0) {
3086
11
        DCHECK(scale <= 38);
3087
        // do rounding, so we need to reserve decimal_places + 1 digits
3088
11
        auto multiplier =
3089
11
                common::exp10_i128(std::abs(static_cast<int>(scale - (decimal_places + 1))));
3090
        // do divide first to avoid overflow
3091
        // after round frac_value will be positive by design
3092
11
        frac_value = std::abs(static_cast<int>(frac_value / multiplier)) + 5;
3093
11
        frac_value /= 10;
3094
11
    } else if (scale < decimal_places && decimal_places > 0) {
3095
        // since scale <= decimal_places, overflow is impossible
3096
0
        frac_value = frac_value * common::exp10_i32(decimal_places - scale);
3097
0
    }
3098
3099
    // Calculate power of 10 for decimal_places
3100
11
    T decimal_power = common::exp10_i32(decimal_places);
3101
11
    if (frac_value == decimal_power) {
3102
0
        if (is_negative) {
3103
0
            int_value -= 1;
3104
0
        } else {
3105
0
            int_value += 1;
3106
0
        }
3107
0
        frac_value = 0;
3108
0
    }
3109
3110
11
    bool append_sign_manually = false;
3111
11
    if (is_negative && int_value == 0) {
3112
0
        append_sign_manually = true;
3113
0
    }
3114
3115
11
    char local[N];
3116
11
    char* p = SimpleItoaWithCommas<T>(int_value, local, sizeof(local));
3117
11
    const Int32 integer_str_len = N - (p - local);
3118
11
    const Int32 frac_str_len = decimal_places;
3119
11
    const Int32 whole_decimal_str_len = (append_sign_manually ? 1 : 0) + integer_str_len +
3120
11
                                        (decimal_places > 0 ? 1 : 0) + frac_str_len;
3121
3122
11
    StringRef result = context->create_temp_string_val(whole_decimal_str_len);
3123
    // Modify a string passed via stringref
3124
11
    char* result_data = const_cast<char*>(result.data);
3125
3126
11
    if (append_sign_manually) {
3127
0
        memset(result_data, '-', 1);
3128
0
    }
3129
3130
11
    memcpy(result_data + (append_sign_manually ? 1 : 0), p, integer_str_len);
3131
11
    if (decimal_places > 0) {
3132
11
        *(result_data + whole_decimal_str_len - (frac_str_len + 1)) = '.';
3133
11
    }
3134
3135
    // Convert fractional part to string with proper padding
3136
11
    T remaining_frac = std::abs(static_cast<int>(frac_value));
3137
33
    for (int i = 0; i <= decimal_places - 1; ++i) {
3138
22
        *(result_data + whole_decimal_str_len - 1 - i) = '0' + (remaining_frac % 10);
3139
22
        remaining_frac /= 10;
3140
22
    }
3141
11
    return result;
3142
11
}
3143
3144
} // namespace FormatRound
3145
3146
struct MoneyFormatDoubleImpl {
3147
7
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; }
3148
3149
    static void execute(FunctionContext* context, ColumnString* result_column,
3150
1
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3151
1
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3152
        // when scale is above 38, we will go here
3153
5
        for (size_t i = 0; i < input_rows_count; i++) {
3154
            // round to 2 decimal places
3155
4
            double value =
3156
4
                    MathFunctions::my_double_round(data_column->get_element(i), 2, false, false);
3157
4
            StringRef str = MoneyFormat::do_money_format(context, fmt::format("{:.2f}", value));
3158
4
            result_column->insert_data(str.data, str.size);
3159
4
        }
3160
1
    }
3161
};
3162
3163
struct MoneyFormatInt64Impl {
3164
7
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt64>()}; }
3165
3166
    static void execute(FunctionContext* context, ColumnString* result_column,
3167
1
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3168
1
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3169
4
        for (size_t i = 0; i < input_rows_count; i++) {
3170
3
            Int64 value = data_column->get_element(i);
3171
3
            StringRef str =
3172
3
                    MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_INT64()>(
3173
3
                            context, 0, value, 0);
3174
3
            result_column->insert_data(str.data, str.size);
3175
3
        }
3176
1
    }
3177
};
3178
3179
struct MoneyFormatInt128Impl {
3180
7
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt128>()}; }
3181
3182
    static void execute(FunctionContext* context, ColumnString* result_column,
3183
1
                        const ColumnPtr col_ptr, size_t input_rows_count) {
3184
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3185
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3186
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3187
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3188
4
        for (size_t i = 0; i < input_rows_count; i++) {
3189
3
            Int128 value = data_column->get_element(i);
3190
3
            StringRef str =
3191
3
                    MoneyFormat::do_money_format<Int128, MoneyFormat::MAX_FORMAT_LEN_INT128()>(
3192
3
                            context, 0, value, 0);
3193
3
            result_column->insert_data(str.data, str.size);
3194
3
        }
3195
1
    }
3196
};
3197
3198
template <PrimitiveType Type>
3199
struct MoneyFormatDecimalImpl {
3200
35
    static DataTypes get_variadic_argument_types() {
3201
35
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
35
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv
Line
Count
Source
3200
7
    static DataTypes get_variadic_argument_types() {
3201
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
7
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv
Line
Count
Source
3200
7
    static DataTypes get_variadic_argument_types() {
3201
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
7
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv
Line
Count
Source
3200
7
    static DataTypes get_variadic_argument_types() {
3201
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
7
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv
Line
Count
Source
3200
7
    static DataTypes get_variadic_argument_types() {
3201
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
7
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv
Line
Count
Source
3200
7
    static DataTypes get_variadic_argument_types() {
3201
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>()};
3202
7
    }
3203
3204
    static void execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr,
3205
5
                        size_t input_rows_count) {
3206
5
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
16
            for (size_t i = 0; i < input_rows_count; i++) {
3208
14
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
14
                auto unified_frac_value = value.frac_value() / 1000000;
3211
14
                StringRef str =
3212
14
                        MoneyFormat::do_money_format<Int128,
3213
14
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
14
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
14
                result_column->insert_data(str.data, str.size);
3217
14
            }
3218
3
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
0
            const UInt32 scale = decimal32_column->get_scale();
3220
0
            for (size_t i = 0; i < input_rows_count; i++) {
3221
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
0
                StringRef str =
3224
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
0
                                context, scale, static_cast<Int64>(whole_part),
3226
0
                                static_cast<Int64>(frac_part));
3227
3228
0
                result_column->insert_data(str.data, str.size);
3229
0
            }
3230
3
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
3
            const UInt32 scale = decimal64_column->get_scale();
3232
8
            for (size_t i = 0; i < input_rows_count; i++) {
3233
5
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
5
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
5
                StringRef str =
3237
5
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
5
                                context, scale, whole_part, frac_part);
3239
3240
5
                result_column->insert_data(str.data, str.size);
3241
5
            }
3242
3
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
0
            const UInt32 scale = decimal128_column->get_scale();
3244
0
            for (size_t i = 0; i < input_rows_count; i++) {
3245
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
0
                StringRef str =
3249
0
                        MoneyFormat::do_money_format<Int128,
3250
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
0
                                context, scale, whole_part, frac_part);
3252
3253
0
                result_column->insert_data(str.data, str.size);
3254
0
            }
3255
0
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
5
    }
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE20EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3205
2
                        size_t input_rows_count) {
3206
2
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
16
            for (size_t i = 0; i < input_rows_count; i++) {
3208
14
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
14
                auto unified_frac_value = value.frac_value() / 1000000;
3211
14
                StringRef str =
3212
14
                        MoneyFormat::do_money_format<Int128,
3213
14
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
14
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
14
                result_column->insert_data(str.data, str.size);
3217
14
            }
3218
2
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
0
            const UInt32 scale = decimal32_column->get_scale();
3220
0
            for (size_t i = 0; i < input_rows_count; i++) {
3221
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
0
                StringRef str =
3224
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
0
                                context, scale, static_cast<Int64>(whole_part),
3226
0
                                static_cast<Int64>(frac_part));
3227
3228
0
                result_column->insert_data(str.data, str.size);
3229
0
            }
3230
0
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
0
            const UInt32 scale = decimal64_column->get_scale();
3232
0
            for (size_t i = 0; i < input_rows_count; i++) {
3233
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
0
                StringRef str =
3237
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
0
                                context, scale, whole_part, frac_part);
3239
3240
0
                result_column->insert_data(str.data, str.size);
3241
0
            }
3242
0
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
0
            const UInt32 scale = decimal128_column->get_scale();
3244
0
            for (size_t i = 0; i < input_rows_count; i++) {
3245
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
0
                StringRef str =
3249
0
                        MoneyFormat::do_money_format<Int128,
3250
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
0
                                context, scale, whole_part, frac_part);
3252
3253
0
                result_column->insert_data(str.data, str.size);
3254
0
            }
3255
0
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
2
    }
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE28EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
_ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE29EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Line
Count
Source
3205
3
                        size_t input_rows_count) {
3206
3
        if (auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3207
0
            for (size_t i = 0; i < input_rows_count; i++) {
3208
0
                const auto& value = decimalv2_column->get_element(i);
3209
                // unified_frac_value has 3 digits
3210
0
                auto unified_frac_value = value.frac_value() / 1000000;
3211
0
                StringRef str =
3212
0
                        MoneyFormat::do_money_format<Int128,
3213
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V2()>(
3214
0
                                context, 3, value.int_value(), unified_frac_value);
3215
3216
0
                result_column->insert_data(str.data, str.size);
3217
0
            }
3218
3
        } else if (auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3219
0
            const UInt32 scale = decimal32_column->get_scale();
3220
0
            for (size_t i = 0; i < input_rows_count; i++) {
3221
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3222
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3223
0
                StringRef str =
3224
0
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC32()>(
3225
0
                                context, scale, static_cast<Int64>(whole_part),
3226
0
                                static_cast<Int64>(frac_part));
3227
3228
0
                result_column->insert_data(str.data, str.size);
3229
0
            }
3230
3
        } else if (auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3231
3
            const UInt32 scale = decimal64_column->get_scale();
3232
8
            for (size_t i = 0; i < input_rows_count; i++) {
3233
5
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3234
5
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3235
3236
5
                StringRef str =
3237
5
                        MoneyFormat::do_money_format<Int64, MoneyFormat::MAX_FORMAT_LEN_DEC64()>(
3238
5
                                context, scale, whole_part, frac_part);
3239
3240
5
                result_column->insert_data(str.data, str.size);
3241
5
            }
3242
3
        } else if (auto* decimal128_column = check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3243
0
            const UInt32 scale = decimal128_column->get_scale();
3244
0
            for (size_t i = 0; i < input_rows_count; i++) {
3245
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3246
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3247
3248
0
                StringRef str =
3249
0
                        MoneyFormat::do_money_format<Int128,
3250
0
                                                     MoneyFormat::MAX_FORMAT_LEN_DEC128V3()>(
3251
0
                                context, scale, whole_part, frac_part);
3252
3253
0
                result_column->insert_data(str.data, str.size);
3254
0
            }
3255
0
        } else {
3256
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
3257
0
                                   "Not supported input argument type {}", col_ptr->get_name());
3258
0
        }
3259
        // TODO: decimal256
3260
        /* else if (auto* decimal256_column =
3261
                           check_and_get_column<ColumnDecimal<Decimal256>>(*col_ptr)) {
3262
            const UInt32 scale = decimal256_column->get_scale();
3263
            const auto multiplier =
3264
                    scale > 2 ? common::exp10_i32(scale - 2) : common::exp10_i32(2 - scale);
3265
            for (size_t i = 0; i < input_rows_count; i++) {
3266
                Decimal256 frac_part = decimal256_column->get_fractional_part(i);
3267
                if (scale > 2) {
3268
                    int delta = ((frac_part % multiplier) << 1) > multiplier;
3269
                    frac_part = Decimal256(frac_part / multiplier + delta);
3270
                } else if (scale < 2) {
3271
                    frac_part = Decimal256(frac_part * multiplier);
3272
                }
3273
3274
                StringRef str = MoneyFormat::do_money_format<int64_t, 26>(
3275
                        context, decimal256_column->get_intergral_part(i), frac_part);
3276
3277
                result_column->insert_data(str.data, str.size);
3278
            }
3279
        }*/
3280
3
    }
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE30EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
Unexecuted instantiation: _ZN5doris22MoneyFormatDecimalImplILNS_13PrimitiveTypeE35EE7executeEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EEm
3281
};
3282
3283
struct FormatRoundDoubleImpl {
3284
7
    static DataTypes get_variadic_argument_types() {
3285
7
        return {std::make_shared<DataTypeFloat64>(), std::make_shared<DataTypeInt32>()};
3286
7
    }
3287
3288
4
    static std::string add_thousands_separator(const std::string& formatted_num) {
3289
        //  Find the position of the decimal point
3290
4
        size_t dot_pos = formatted_num.find('.');
3291
4
        if (dot_pos == std::string::npos) {
3292
0
            dot_pos = formatted_num.size();
3293
0
        }
3294
3295
        // Handle the integer part
3296
4
        int start = (formatted_num[0] == '-') ? 1 : 0;
3297
4
        int digit_count = dot_pos - start;
3298
3299
        // There is no need to add commas.
3300
4
        if (digit_count <= 3) {
3301
2
            return formatted_num;
3302
2
        }
3303
3304
2
        std::string result;
3305
3306
2
        if (start == 1) result += '-';
3307
3308
        // Add the integer part (with comma)
3309
2
        int first_group = digit_count % 3;
3310
2
        if (first_group == 0) first_group = 3;
3311
2
        result.append(formatted_num, start, first_group);
3312
3313
6
        for (size_t i = start + first_group; i < dot_pos; i += 3) {
3314
4
            result += ',';
3315
4
            result.append(formatted_num, i, 3);
3316
4
        }
3317
3318
        // Add the decimal part (keep as it is)
3319
2
        if (dot_pos != formatted_num.size()) {
3320
2
            result.append(formatted_num, dot_pos);
3321
2
        }
3322
3323
2
        return result;
3324
4
    }
3325
3326
    template <bool is_const>
3327
    static Status execute(FunctionContext* context, ColumnString* result_column,
3328
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3329
1
                          size_t input_rows_count) {
3330
1
        const auto& arg_column_data_2 =
3331
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3332
1
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3333
        // when scale is above 38, we will go here
3334
5
        for (size_t i = 0; i < input_rows_count; i++) {
3335
4
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3336
4
            if (decimal_places < 0 || decimal_places > 1024) {
3337
0
                return Status::InvalidArgument(
3338
0
                        "The second argument is {}, it should be in range [0, 1024].",
3339
0
                        decimal_places);
3340
0
            }
3341
            // round to `decimal_places` decimal places
3342
4
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3343
4
                                                          decimal_places, false, false);
3344
4
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3345
4
            if (std::isfinite(value)) {
3346
4
                result_column->insert_value(add_thousands_separator(formatted_value));
3347
4
            } else {
3348
                // if value is not finite, we just insert the original formatted value
3349
                // e.g. "inf", "-inf", "nan"
3350
0
                result_column->insert_value(formatted_value);
3351
0
            }
3352
4
        }
3353
1
        return Status::OK();
3354
1
    }
Unexecuted instantiation: _ZN5doris21FormatRoundDoubleImpl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
_ZN5doris21FormatRoundDoubleImpl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3329
1
                          size_t input_rows_count) {
3330
1
        const auto& arg_column_data_2 =
3331
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3332
1
        const auto* data_column = assert_cast<const ColumnFloat64*>(col_ptr.get());
3333
        // when scale is above 38, we will go here
3334
5
        for (size_t i = 0; i < input_rows_count; i++) {
3335
4
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3336
4
            if (decimal_places < 0 || decimal_places > 1024) {
3337
0
                return Status::InvalidArgument(
3338
0
                        "The second argument is {}, it should be in range [0, 1024].",
3339
0
                        decimal_places);
3340
0
            }
3341
            // round to `decimal_places` decimal places
3342
4
            double value = MathFunctions::my_double_round(data_column->get_element(i),
3343
4
                                                          decimal_places, false, false);
3344
4
            std::string formatted_value = fmt::format("{:.{}f}", value, decimal_places);
3345
4
            if (std::isfinite(value)) {
3346
4
                result_column->insert_value(add_thousands_separator(formatted_value));
3347
4
            } else {
3348
                // if value is not finite, we just insert the original formatted value
3349
                // e.g. "inf", "-inf", "nan"
3350
0
                result_column->insert_value(formatted_value);
3351
0
            }
3352
4
        }
3353
1
        return Status::OK();
3354
1
    }
3355
};
3356
3357
struct FormatRoundInt64Impl {
3358
7
    static DataTypes get_variadic_argument_types() {
3359
7
        return {std::make_shared<DataTypeInt64>(), std::make_shared<DataTypeInt32>()};
3360
7
    }
3361
3362
    template <bool is_const>
3363
    static Status execute(FunctionContext* context, ColumnString* result_column,
3364
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3365
1
                          size_t input_rows_count) {
3366
1
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3367
1
        const auto& arg_column_data_2 =
3368
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3369
9
        for (size_t i = 0; i < input_rows_count; i++) {
3370
8
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3371
8
            if (decimal_places < 0 || decimal_places > 1024) {
3372
0
                return Status::InvalidArgument(
3373
0
                        "The second argument is {}, it should be in range [0, 1024].",
3374
0
                        decimal_places);
3375
0
            }
3376
8
            Int64 value = data_column->get_element(i);
3377
8
            StringRef str =
3378
8
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3379
8
                            context, 0, value, 0, decimal_places);
3380
8
            result_column->insert_data(str.data, str.size);
3381
8
        }
3382
1
        return Status::OK();
3383
1
    }
Unexecuted instantiation: _ZN5doris20FormatRoundInt64Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
_ZN5doris20FormatRoundInt64Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3365
1
                          size_t input_rows_count) {
3366
1
        const auto* data_column = assert_cast<const ColumnInt64*>(col_ptr.get());
3367
1
        const auto& arg_column_data_2 =
3368
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3369
9
        for (size_t i = 0; i < input_rows_count; i++) {
3370
8
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3371
8
            if (decimal_places < 0 || decimal_places > 1024) {
3372
0
                return Status::InvalidArgument(
3373
0
                        "The second argument is {}, it should be in range [0, 1024].",
3374
0
                        decimal_places);
3375
0
            }
3376
8
            Int64 value = data_column->get_element(i);
3377
8
            StringRef str =
3378
8
                    FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_INT64()>(
3379
8
                            context, 0, value, 0, decimal_places);
3380
8
            result_column->insert_data(str.data, str.size);
3381
8
        }
3382
1
        return Status::OK();
3383
1
    }
3384
};
3385
3386
struct FormatRoundInt128Impl {
3387
7
    static DataTypes get_variadic_argument_types() {
3388
7
        return {std::make_shared<DataTypeInt128>(), std::make_shared<DataTypeInt32>()};
3389
7
    }
3390
3391
    template <bool is_const>
3392
    static Status execute(FunctionContext* context, ColumnString* result_column,
3393
                          const ColumnPtr col_ptr, ColumnPtr decimal_places_col_ptr,
3394
1
                          size_t input_rows_count) {
3395
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3396
1
        const auto& arg_column_data_2 =
3397
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3398
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3399
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3400
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3401
10
        for (size_t i = 0; i < input_rows_count; i++) {
3402
9
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3403
9
            if (decimal_places < 0 || decimal_places > 1024) {
3404
0
                return Status::InvalidArgument(
3405
0
                        "The second argument is {}, it should be in range [0, 1024].",
3406
0
                        decimal_places);
3407
0
            }
3408
9
            Int128 value = data_column->get_element(i);
3409
9
            StringRef str =
3410
9
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3411
9
                            context, 0, value, 0, decimal_places);
3412
9
            result_column->insert_data(str.data, str.size);
3413
9
        }
3414
1
        return Status::OK();
3415
1
    }
Unexecuted instantiation: _ZN5doris21FormatRoundInt128Impl7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
_ZN5doris21FormatRoundInt128Impl7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrIS9_EESC_m
Line
Count
Source
3394
1
                          size_t input_rows_count) {
3395
1
        const auto* data_column = assert_cast<const ColumnInt128*>(col_ptr.get());
3396
1
        const auto& arg_column_data_2 =
3397
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3398
        // SELECT money_format(170141183460469231731687303715884105728/*INT128_MAX + 1*/) will
3399
        // get "170,141,183,460,469,231,731,687,303,715,884,105,727.00" in doris,
3400
        // see https://github.com/apache/doris/blob/788abf2d7c3c7c2d57487a9608e889e7662d5fb2/be/src/vec/data_types/data_type_number_base.cpp#L124
3401
10
        for (size_t i = 0; i < input_rows_count; i++) {
3402
9
            int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3403
9
            if (decimal_places < 0 || decimal_places > 1024) {
3404
0
                return Status::InvalidArgument(
3405
0
                        "The second argument is {}, it should be in range [0, 1024].",
3406
0
                        decimal_places);
3407
0
            }
3408
9
            Int128 value = data_column->get_element(i);
3409
9
            StringRef str =
3410
9
                    FormatRound::do_format_round<Int128, FormatRound::MAX_FORMAT_LEN_INT128()>(
3411
9
                            context, 0, value, 0, decimal_places);
3412
9
            result_column->insert_data(str.data, str.size);
3413
9
        }
3414
1
        return Status::OK();
3415
1
    }
3416
};
3417
3418
template <PrimitiveType Type>
3419
struct FormatRoundDecimalImpl {
3420
35
    static DataTypes get_variadic_argument_types() {
3421
35
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
35
                std::make_shared<DataTypeInt32>()};
3423
35
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE27get_variadic_argument_typesEv
Line
Count
Source
3420
7
    static DataTypes get_variadic_argument_types() {
3421
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
7
                std::make_shared<DataTypeInt32>()};
3423
7
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE27get_variadic_argument_typesEv
Line
Count
Source
3420
7
    static DataTypes get_variadic_argument_types() {
3421
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
7
                std::make_shared<DataTypeInt32>()};
3423
7
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE27get_variadic_argument_typesEv
Line
Count
Source
3420
7
    static DataTypes get_variadic_argument_types() {
3421
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
7
                std::make_shared<DataTypeInt32>()};
3423
7
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE27get_variadic_argument_typesEv
Line
Count
Source
3420
7
    static DataTypes get_variadic_argument_types() {
3421
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
7
                std::make_shared<DataTypeInt32>()};
3423
7
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE27get_variadic_argument_typesEv
Line
Count
Source
3420
7
    static DataTypes get_variadic_argument_types() {
3421
7
        return {std::make_shared<typename PrimitiveTypeTraits<Type>::DataType>(),
3422
7
                std::make_shared<DataTypeInt32>()};
3423
7
    }
3424
3425
    template <bool is_const>
3426
    static Status execute(FunctionContext* context, ColumnString* result_column, ColumnPtr col_ptr,
3427
9
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
9
        const auto& arg_column_data_2 =
3429
9
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
9
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
4
            for (size_t i = 0; i < input_rows_count; i++) {
3432
3
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
3
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
3
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
3
                auto unified_frac_value = value.frac_value() / 1000000;
3441
3
                StringRef str =
3442
3
                        FormatRound::do_format_round<Int128,
3443
3
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
3
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
3
                result_column->insert_data(str.data, str.size);
3447
3
            }
3448
8
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
8
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
8
            const UInt32 scale = decimal64_column->get_scale();
3468
19
            for (size_t i = 0; i < input_rows_count; i++) {
3469
11
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
11
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
11
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
11
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
11
                StringRef str =
3479
11
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
11
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
11
                result_column->insert_data(str.data, str.size);
3483
11
            }
3484
8
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
9
        return Status::OK();
3509
9
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE20EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
1
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
1
        const auto& arg_column_data_2 =
3429
1
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
1
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
4
            for (size_t i = 0; i < input_rows_count; i++) {
3432
3
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
3
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
3
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
3
                auto unified_frac_value = value.frac_value() / 1000000;
3441
3
                StringRef str =
3442
3
                        FormatRound::do_format_round<Int128,
3443
3
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
3
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
3
                result_column->insert_data(str.data, str.size);
3447
3
            }
3448
1
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
0
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
0
            const UInt32 scale = decimal64_column->get_scale();
3468
0
            for (size_t i = 0; i < input_rows_count; i++) {
3469
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
0
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
0
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
0
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
0
                StringRef str =
3479
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
0
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
0
                result_column->insert_data(str.data, str.size);
3483
0
            }
3484
0
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
1
        return Status::OK();
3509
1
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE28EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
2
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
2
        const auto& arg_column_data_2 =
3429
2
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
2
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
2
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
2
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
2
            const UInt32 scale = decimal64_column->get_scale();
3468
4
            for (size_t i = 0; i < input_rows_count; i++) {
3469
2
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
2
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
2
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
2
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
2
                StringRef str =
3479
2
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
2
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
2
                result_column->insert_data(str.data, str.size);
3483
2
            }
3484
2
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
2
        return Status::OK();
3509
2
    }
_ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE29EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Line
Count
Source
3427
6
                          ColumnPtr decimal_places_col_ptr, size_t input_rows_count) {
3428
6
        const auto& arg_column_data_2 =
3429
6
                assert_cast<const ColumnInt32*>(decimal_places_col_ptr.get())->get_data();
3430
6
        if (const auto* decimalv2_column = check_and_get_column<ColumnDecimal128V2>(*col_ptr)) {
3431
0
            for (size_t i = 0; i < input_rows_count; i++) {
3432
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3433
0
                if (decimal_places < 0 || decimal_places > 1024) {
3434
0
                    return Status::InvalidArgument(
3435
0
                            "The second argument is {}, it should be in range [0, 1024].",
3436
0
                            decimal_places);
3437
0
                }
3438
0
                const auto& value = decimalv2_column->get_element(i);
3439
                // unified_frac_value has 3 digits
3440
0
                auto unified_frac_value = value.frac_value() / 1000000;
3441
0
                StringRef str =
3442
0
                        FormatRound::do_format_round<Int128,
3443
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V2()>(
3444
0
                                context, 3, value.int_value(), unified_frac_value, decimal_places);
3445
3446
0
                result_column->insert_data(str.data, str.size);
3447
0
            }
3448
6
        } else if (const auto* decimal32_column = check_and_get_column<ColumnDecimal32>(*col_ptr)) {
3449
0
            const UInt32 scale = decimal32_column->get_scale();
3450
0
            for (size_t i = 0; i < input_rows_count; i++) {
3451
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3452
0
                if (decimal_places < 0 || decimal_places > 1024) {
3453
0
                    return Status::InvalidArgument(
3454
0
                            "The second argument is {}, it should be in range [0, 1024].",
3455
0
                            decimal_places);
3456
0
                }
3457
0
                const Int32& frac_part = decimal32_column->get_fractional_part(i);
3458
0
                const Int32& whole_part = decimal32_column->get_intergral_part(i);
3459
0
                StringRef str =
3460
0
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC32()>(
3461
0
                                context, scale, static_cast<Int64>(whole_part),
3462
0
                                static_cast<Int64>(frac_part), decimal_places);
3463
3464
0
                result_column->insert_data(str.data, str.size);
3465
0
            }
3466
6
        } else if (const auto* decimal64_column = check_and_get_column<ColumnDecimal64>(*col_ptr)) {
3467
6
            const UInt32 scale = decimal64_column->get_scale();
3468
15
            for (size_t i = 0; i < input_rows_count; i++) {
3469
9
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3470
9
                if (decimal_places < 0 || decimal_places > 1024) {
3471
0
                    return Status::InvalidArgument(
3472
0
                            "The second argument is {}, it should be in range [0, 1024].",
3473
0
                            decimal_places);
3474
0
                }
3475
9
                const Int64& frac_part = decimal64_column->get_fractional_part(i);
3476
9
                const Int64& whole_part = decimal64_column->get_intergral_part(i);
3477
3478
9
                StringRef str =
3479
9
                        FormatRound::do_format_round<Int64, FormatRound::MAX_FORMAT_LEN_DEC64()>(
3480
9
                                context, scale, whole_part, frac_part, decimal_places);
3481
3482
9
                result_column->insert_data(str.data, str.size);
3483
9
            }
3484
6
        } else if (const auto* decimal128_column =
3485
0
                           check_and_get_column<ColumnDecimal128V3>(*col_ptr)) {
3486
0
            const UInt32 scale = decimal128_column->get_scale();
3487
0
            for (size_t i = 0; i < input_rows_count; i++) {
3488
0
                int32_t decimal_places = arg_column_data_2[index_check_const<is_const>(i)];
3489
0
                if (decimal_places < 0 || decimal_places > 1024) {
3490
0
                    return Status::InvalidArgument(
3491
0
                            "The second argument is {}, it should be in range [0, 1024].",
3492
0
                            decimal_places);
3493
0
                }
3494
0
                const Int128& frac_part = decimal128_column->get_fractional_part(i);
3495
0
                const Int128& whole_part = decimal128_column->get_intergral_part(i);
3496
3497
0
                StringRef str =
3498
0
                        FormatRound::do_format_round<Int128,
3499
0
                                                     FormatRound::MAX_FORMAT_LEN_DEC128V3()>(
3500
0
                                context, scale, whole_part, frac_part, decimal_places);
3501
3502
0
                result_column->insert_data(str.data, str.size);
3503
0
            }
3504
0
        } else {
3505
0
            return Status::InternalError("Not supported input argument type {}",
3506
0
                                         col_ptr->get_name());
3507
0
        }
3508
6
        return Status::OK();
3509
6
    }
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE30EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb1EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
Unexecuted instantiation: _ZN5doris22FormatRoundDecimalImplILNS_13PrimitiveTypeE35EE7executeILb0EEENS_6StatusEPNS_15FunctionContextEPNS_9ColumnStrIjEENS_3COWINS_7IColumnEE13immutable_ptrISB_EESE_m
3510
};
3511
3512
class FunctionStringLocatePos : public IFunction {
3513
public:
3514
    static constexpr auto name = "locate";
3515
822
    static FunctionPtr create() { return std::make_shared<FunctionStringLocatePos>(); }
3516
0
    String get_name() const override { return name; }
3517
0
    size_t get_number_of_arguments() const override { return 3; }
3518
3519
814
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3520
814
        return std::make_shared<DataTypeInt32>();
3521
814
    }
3522
3523
7
    DataTypes get_variadic_argument_types_impl() const override {
3524
7
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3525
7
                std::make_shared<DataTypeInt32>()};
3526
7
    }
3527
3528
815
    bool is_variadic() const override { return true; }
3529
3530
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3531
576
                        uint32_t result, size_t input_rows_count) const override {
3532
576
        if (arguments.size() != 3) {
3533
0
            return Status::InvalidArgument("Function {} requires 3 arguments, but got {}",
3534
0
                                           get_name(), arguments.size());
3535
0
        }
3536
576
        bool col_const[3];
3537
576
        ColumnPtr argument_columns[3];
3538
2.30k
        for (int i = 0; i < 3; ++i) {
3539
1.72k
            std::tie(argument_columns[i], col_const[i]) =
3540
1.72k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3541
1.72k
        }
3542
3543
576
        const auto* col_left = assert_cast<const ColumnString*>(argument_columns[0].get());
3544
576
        const auto* col_right = assert_cast<const ColumnString*>(argument_columns[1].get());
3545
576
        const auto* col_pos = assert_cast<const ColumnInt32*>(argument_columns[2].get());
3546
3547
576
        ColumnInt32::MutablePtr col_res = ColumnInt32::create();
3548
576
        auto& vec_res = col_res->get_data();
3549
576
        vec_res.resize(block.rows());
3550
3551
576
        const bool is_ascii = col_left->is_ascii() && col_right->is_ascii();
3552
3553
576
        if (col_const[0]) {
3554
246
            std::visit(
3555
246
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
246
                        scalar_search<is_ascii, str_const, pos_const>(
3557
246
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
246
                                input_rows_count);
3559
246
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3555
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
22
                        scalar_search<is_ascii, str_const, pos_const>(
3557
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
22
                                input_rows_count);
3559
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
3555
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
22
                        scalar_search<is_ascii, str_const, pos_const>(
3557
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
22
                                input_rows_count);
3559
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
3555
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
22
                        scalar_search<is_ascii, str_const, pos_const>(
3557
22
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
22
                                input_rows_count);
3559
22
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
3555
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
60
                        scalar_search<is_ascii, str_const, pos_const>(
3557
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
60
                                input_rows_count);
3559
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
3555
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
60
                        scalar_search<is_ascii, str_const, pos_const>(
3557
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
60
                                input_rows_count);
3559
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
3555
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3556
60
                        scalar_search<is_ascii, str_const, pos_const>(
3557
60
                                col_left->get_data_at(0), col_right, col_pos->get_data(), vec_res,
3558
60
                                input_rows_count);
3559
60
                    },
Unexecuted instantiation: _ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
3560
246
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
3561
246
                    make_bool_variant(col_const[2]));
3562
3563
330
        } else {
3564
330
            std::visit(
3565
330
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
330
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
330
                                                                      col_pos->get_data(), vec_res,
3568
330
                                                                      input_rows_count);
3569
330
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3565
23
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
23
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
23
                                                                      col_pos->get_data(), vec_res,
3568
23
                                                                      input_rows_count);
3569
23
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESG_SF_IbLb1EEEEDaSA_SB_SC_
Line
Count
Source
3565
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
22
                                                                      col_pos->get_data(), vec_res,
3568
22
                                                                      input_rows_count);
3569
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESG_EEDaSA_SB_SC_
Line
Count
Source
3565
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
22
                                                                      col_pos->get_data(), vec_res,
3568
22
                                                                      input_rows_count);
3569
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb0EESF_IbLb1EESH_EEDaSA_SB_SC_
Line
Count
Source
3565
22
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
22
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
22
                                                                      col_pos->get_data(), vec_res,
3568
22
                                                                      input_rows_count);
3569
22
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESH_EEDaSA_SB_SC_
Line
Count
Source
3565
61
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
61
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
61
                                                                      col_pos->get_data(), vec_res,
3568
61
                                                                      input_rows_count);
3569
61
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESF_IbLb0EESG_EEDaSA_SB_SC_
Line
Count
Source
3565
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
60
                                                                      col_pos->get_data(), vec_res,
3568
60
                                                                      input_rows_count);
3569
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SF_IbLb0EEEEDaSA_SB_SC_
Line
Count
Source
3565
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
60
                                                                      col_pos->get_data(), vec_res,
3568
60
                                                                      input_rows_count);
3569
60
                    },
_ZZNK5doris23FunctionStringLocatePos12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E0_clISt17integral_constantIbLb1EESG_SG_EEDaSA_SB_SC_
Line
Count
Source
3565
60
                    [&](auto is_ascii, auto str_const, auto pos_const) {
3566
60
                        vector_search<is_ascii, str_const, pos_const>(col_left, col_right,
3567
60
                                                                      col_pos->get_data(), vec_res,
3568
60
                                                                      input_rows_count);
3569
60
                    },
3570
330
                    make_bool_variant(is_ascii), make_bool_variant(col_const[1]),
3571
330
                    make_bool_variant(col_const[2]));
3572
330
        }
3573
576
        block.replace_by_position(result, std::move(col_res));
3574
576
        return Status::OK();
3575
576
    }
3576
3577
private:
3578
    template <bool is_ascii, bool str_const, bool pos_const>
3579
    void scalar_search(const StringRef& ldata, const ColumnString* col_right,
3580
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
3581
246
                       size_t size) const {
3582
246
        res.resize(size);
3583
246
        StringRef substr(ldata.data, ldata.size);
3584
246
        StringSearch search {&substr};
3585
3586
492
        for (int i = 0; i < size; ++i) {
3587
246
            res[i] = locate_pos<is_ascii>(substr,
3588
246
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
246
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
246
        }
3591
246
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
22
                       size_t size) const {
3582
22
        res.resize(size);
3583
22
        StringRef substr(ldata.data, ldata.size);
3584
22
        StringSearch search {&substr};
3585
3586
44
        for (int i = 0; i < size; ++i) {
3587
22
            res[i] = locate_pos<is_ascii>(substr,
3588
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
22
        }
3591
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
22
                       size_t size) const {
3582
22
        res.resize(size);
3583
22
        StringRef substr(ldata.data, ldata.size);
3584
22
        StringSearch search {&substr};
3585
3586
44
        for (int i = 0; i < size; ++i) {
3587
22
            res[i] = locate_pos<is_ascii>(substr,
3588
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
22
        }
3591
22
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
22
                       size_t size) const {
3582
22
        res.resize(size);
3583
22
        StringRef substr(ldata.data, ldata.size);
3584
22
        StringSearch search {&substr};
3585
3586
44
        for (int i = 0; i < size; ++i) {
3587
22
            res[i] = locate_pos<is_ascii>(substr,
3588
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
22
        }
3591
22
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb0ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
60
                       size_t size) const {
3582
60
        res.resize(size);
3583
60
        StringRef substr(ldata.data, ldata.size);
3584
60
        StringSearch search {&substr};
3585
3586
120
        for (int i = 0; i < size; ++i) {
3587
60
            res[i] = locate_pos<is_ascii>(substr,
3588
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
60
        }
3591
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb0ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
60
                       size_t size) const {
3582
60
        res.resize(size);
3583
60
        StringRef substr(ldata.data, ldata.size);
3584
60
        StringSearch search {&substr};
3585
3586
120
        for (int i = 0; i < size; ++i) {
3587
60
            res[i] = locate_pos<is_ascii>(substr,
3588
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
60
        }
3591
60
    }
_ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb0EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
Line
Count
Source
3581
60
                       size_t size) const {
3582
60
        res.resize(size);
3583
60
        StringRef substr(ldata.data, ldata.size);
3584
60
        StringSearch search {&substr};
3585
3586
120
        for (int i = 0; i < size; ++i) {
3587
60
            res[i] = locate_pos<is_ascii>(substr,
3588
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3589
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3590
60
        }
3591
60
    }
Unexecuted instantiation: _ZNK5doris23FunctionStringLocatePos13scalar_searchILb1ELb1ELb1EEEvRKNS_9StringRefEPKNS_9ColumnStrIjEERKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSD_m
3592
3593
    template <bool is_ascii, bool str_const, bool pos_const>
3594
    void vector_search(const ColumnString* col_left, const ColumnString* col_right,
3595
                       const PaddedPODArray<Int32>& posdata, PaddedPODArray<Int32>& res,
3596
330
                       size_t size) const {
3597
330
        res.resize(size);
3598
330
        StringSearch search;
3599
774
        for (int i = 0; i < size; ++i) {
3600
444
            StringRef substr = col_left->get_data_at(i);
3601
444
            search.set_pattern(&substr);
3602
444
            res[i] = locate_pos<is_ascii>(substr,
3603
444
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
444
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
444
        }
3606
330
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
23
                       size_t size) const {
3597
23
        res.resize(size);
3598
23
        StringSearch search;
3599
71
        for (int i = 0; i < size; ++i) {
3600
48
            StringRef substr = col_left->get_data_at(i);
3601
48
            search.set_pattern(&substr);
3602
48
            res[i] = locate_pos<is_ascii>(substr,
3603
48
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
48
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
48
        }
3606
23
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
22
                       size_t size) const {
3597
22
        res.resize(size);
3598
22
        StringSearch search;
3599
44
        for (int i = 0; i < size; ++i) {
3600
22
            StringRef substr = col_left->get_data_at(i);
3601
22
            search.set_pattern(&substr);
3602
22
            res[i] = locate_pos<is_ascii>(substr,
3603
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
22
        }
3606
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
22
                       size_t size) const {
3597
22
        res.resize(size);
3598
22
        StringSearch search;
3599
44
        for (int i = 0; i < size; ++i) {
3600
22
            StringRef substr = col_left->get_data_at(i);
3601
22
            search.set_pattern(&substr);
3602
22
            res[i] = locate_pos<is_ascii>(substr,
3603
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
22
        }
3606
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
22
                       size_t size) const {
3597
22
        res.resize(size);
3598
22
        StringSearch search;
3599
44
        for (int i = 0; i < size; ++i) {
3600
22
            StringRef substr = col_left->get_data_at(i);
3601
22
            search.set_pattern(&substr);
3602
22
            res[i] = locate_pos<is_ascii>(substr,
3603
22
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
22
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
22
        }
3606
22
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
61
                       size_t size) const {
3597
61
        res.resize(size);
3598
61
        StringSearch search;
3599
211
        for (int i = 0; i < size; ++i) {
3600
150
            StringRef substr = col_left->get_data_at(i);
3601
150
            search.set_pattern(&substr);
3602
150
            res[i] = locate_pos<is_ascii>(substr,
3603
150
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
150
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
150
        }
3606
61
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
60
                       size_t size) const {
3597
60
        res.resize(size);
3598
60
        StringSearch search;
3599
120
        for (int i = 0; i < size; ++i) {
3600
60
            StringRef substr = col_left->get_data_at(i);
3601
60
            search.set_pattern(&substr);
3602
60
            res[i] = locate_pos<is_ascii>(substr,
3603
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
60
        }
3606
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
60
                       size_t size) const {
3597
60
        res.resize(size);
3598
60
        StringSearch search;
3599
120
        for (int i = 0; i < size; ++i) {
3600
60
            StringRef substr = col_left->get_data_at(i);
3601
60
            search.set_pattern(&substr);
3602
60
            res[i] = locate_pos<is_ascii>(substr,
3603
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
60
        }
3606
60
    }
_ZNK5doris23FunctionStringLocatePos13vector_searchILb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERSA_m
Line
Count
Source
3596
60
                       size_t size) const {
3597
60
        res.resize(size);
3598
60
        StringSearch search;
3599
120
        for (int i = 0; i < size; ++i) {
3600
60
            StringRef substr = col_left->get_data_at(i);
3601
60
            search.set_pattern(&substr);
3602
60
            res[i] = locate_pos<is_ascii>(substr,
3603
60
                                          col_right->get_data_at(index_check_const<str_const>(i)),
3604
60
                                          search, posdata[index_check_const<pos_const>(i)]);
3605
60
        }
3606
60
    }
3607
3608
    template <bool is_ascii>
3609
690
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3610
690
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3611
            // BEHAVIOR COMPATIBLE WITH MYSQL
3612
            // locate('','')  locate('','',1) locate('','',2)
3613
            // 1  1 0
3614
11
            return 1;
3615
11
        }
3616
679
        if (is_ascii) {
3617
499
            return locate_pos_ascii(substr, str, search, start_pos);
3618
499
        } else {
3619
180
            return locate_pos_utf8(substr, str, search, start_pos);
3620
180
        }
3621
679
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb0EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
3609
180
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3610
180
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3611
            // BEHAVIOR COMPATIBLE WITH MYSQL
3612
            // locate('','')  locate('','',1) locate('','',2)
3613
            // 1  1 0
3614
0
            return 1;
3615
0
        }
3616
180
        if (is_ascii) {
3617
0
            return locate_pos_ascii(substr, str, search, start_pos);
3618
180
        } else {
3619
180
            return locate_pos_utf8(substr, str, search, start_pos);
3620
180
        }
3621
180
    }
_ZNK5doris23FunctionStringLocatePos10locate_posILb1EEEiNS_9StringRefES2_RNS_12StringSearchEi
Line
Count
Source
3609
510
    int locate_pos(StringRef substr, StringRef str, StringSearch& search, int start_pos) const {
3610
510
        if (str.size == 0 && substr.size == 0 && start_pos == 1) {
3611
            // BEHAVIOR COMPATIBLE WITH MYSQL
3612
            // locate('','')  locate('','',1) locate('','',2)
3613
            // 1  1 0
3614
11
            return 1;
3615
11
        }
3616
499
        if (is_ascii) {
3617
499
            return locate_pos_ascii(substr, str, search, start_pos);
3618
499
        } else {
3619
0
            return locate_pos_utf8(substr, str, search, start_pos);
3620
0
        }
3621
499
    }
3622
3623
    int locate_pos_utf8(StringRef substr, StringRef str, StringSearch& search,
3624
180
                        int start_pos) const {
3625
180
        std::vector<size_t> index;
3626
180
        size_t char_len = simd::VStringFunctions::get_char_len(str.data, str.size, index);
3627
180
        if (start_pos <= 0 || start_pos > char_len) {
3628
43
            return 0;
3629
43
        }
3630
137
        if (substr.size == 0) {
3631
17
            return start_pos;
3632
17
        }
3633
        // Input start_pos starts from 1.
3634
120
        StringRef adjusted_str(str.data + index[start_pos - 1], str.size - index[start_pos - 1]);
3635
120
        int32_t match_pos = search.search(&adjusted_str);
3636
120
        if (match_pos >= 0) {
3637
            // Hive returns the position in the original string starting from 1.
3638
104
            return start_pos + simd::VStringFunctions::get_char_len(adjusted_str.data, match_pos);
3639
104
        } else {
3640
16
            return 0;
3641
16
        }
3642
120
    }
3643
3644
    int locate_pos_ascii(StringRef substr, StringRef str, StringSearch& search,
3645
499
                         int start_pos) const {
3646
499
        if (start_pos <= 0 || start_pos > str.size) {
3647
367
            return 0;
3648
367
        }
3649
132
        if (substr.size == 0) {
3650
36
            return start_pos;
3651
36
        }
3652
        // Input start_pos starts from 1.
3653
96
        StringRef adjusted_str(str.data + start_pos - 1, str.size - start_pos + 1);
3654
96
        int32_t match_pos = search.search(&adjusted_str);
3655
96
        if (match_pos >= 0) {
3656
            // Hive returns the position in the original string starting from 1.
3657
40
            return start_pos + match_pos;
3658
56
        } else {
3659
56
            return 0;
3660
56
        }
3661
96
    }
3662
};
3663
3664
struct ReplaceImpl {
3665
    static constexpr auto name = "replace";
3666
};
3667
3668
struct ReplaceEmptyImpl {
3669
    static constexpr auto name = "replace_empty";
3670
};
3671
3672
template <typename Impl, bool empty>
3673
class FunctionReplace : public IFunction {
3674
public:
3675
    static constexpr auto name = Impl::name;
3676
3.42k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_11ReplaceImplELb1EE6createEv
Line
Count
Source
3676
1.90k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
_ZN5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE6createEv
Line
Count
Source
3676
1.52k
    static FunctionPtr create() { return std::make_shared<FunctionReplace<Impl, empty>>(); }
3677
2
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE8get_nameB5cxx11Ev
Line
Count
Source
3677
1
    String get_name() const override { return name; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE8get_nameB5cxx11Ev
Line
Count
Source
3677
1
    String get_name() const override { return name; }
3678
3.40k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE23get_number_of_argumentsEv
Line
Count
Source
3678
1.89k
    size_t get_number_of_arguments() const override { return 3; }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE23get_number_of_argumentsEv
Line
Count
Source
3678
1.51k
    size_t get_number_of_arguments() const override { return 3; }
3679
3680
3.40k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3681
3.40k
        return std::make_shared<DataTypeString>();
3682
3.40k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3680
1.89k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3681
1.89k
        return std::make_shared<DataTypeString>();
3682
1.89k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Line
Count
Source
3680
1.51k
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3681
1.51k
        return std::make_shared<DataTypeString>();
3682
1.51k
    }
3683
3684
14
    DataTypes get_variadic_argument_types_impl() const override {
3685
14
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3686
14
                std::make_shared<DataTypeString>()};
3687
14
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE32get_variadic_argument_types_implEv
Line
Count
Source
3684
7
    DataTypes get_variadic_argument_types_impl() const override {
3685
7
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3686
7
                std::make_shared<DataTypeString>()};
3687
7
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE32get_variadic_argument_types_implEv
Line
Count
Source
3684
7
    DataTypes get_variadic_argument_types_impl() const override {
3685
7
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3686
7
                std::make_shared<DataTypeString>()};
3687
7
    }
3688
3689
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3690
2.12k
                        uint32_t result, size_t input_rows_count) const override {
3691
        // We need a local variable to hold a reference to the converted column.
3692
        // So that the converted column will not be released before we use it.
3693
2.12k
        ColumnPtr col[3];
3694
2.12k
        bool col_const[3];
3695
8.49k
        for (size_t i = 0; i < 3; ++i) {
3696
6.37k
            std::tie(col[i], col_const[i]) =
3697
6.37k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3698
6.37k
        }
3699
3700
2.12k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3701
2.12k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3702
2.12k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3703
3704
2.12k
        ColumnString::MutablePtr col_res = ColumnString::create();
3705
3706
2.12k
        std::visit(
3707
2.12k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
7.54k
                    for (int i = 0; i < input_rows_count; ++i) {
3709
5.42k
                        StringRef origin_str =
3710
5.42k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
5.42k
                        StringRef old_str =
3712
5.42k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
5.42k
                        StringRef new_str =
3714
5.42k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
5.42k
                        std::string result =
3717
5.42k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
5.42k
                                        new_str.to_string_view());
3719
3720
5.42k
                        col_res->insert_data(result.data(), result.length());
3721
5.42k
                    }
3722
2.12k
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
3707
126
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
467
                    for (int i = 0; i < input_rows_count; ++i) {
3709
341
                        StringRef origin_str =
3710
341
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
341
                        StringRef old_str =
3712
341
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
341
                        StringRef new_str =
3714
341
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
341
                        std::string result =
3717
341
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
341
                                        new_str.to_string_view());
3719
3720
341
                        col_res->insert_data(result.data(), result.length());
3721
341
                    }
3722
126
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
497
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
3.86k
                    for (int i = 0; i < input_rows_count; ++i) {
3709
3.36k
                        StringRef origin_str =
3710
3.36k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
3.36k
                        StringRef old_str =
3712
3.36k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
3.36k
                        StringRef new_str =
3714
3.36k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
3.36k
                        std::string result =
3717
3.36k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
3.36k
                                        new_str.to_string_view());
3719
3720
3.36k
                        col_res->insert_data(result.data(), result.length());
3721
3.36k
                    }
3722
497
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SI_EEDaSC_SD_SE_
Line
Count
Source
3707
126
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
467
                    for (int i = 0; i < input_rows_count; ++i) {
3709
341
                        StringRef origin_str =
3710
341
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
341
                        StringRef old_str =
3712
341
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
341
                        StringRef new_str =
3714
341
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
341
                        std::string result =
3717
341
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
341
                                        new_str.to_string_view());
3719
3720
341
                        col_res->insert_data(result.data(), result.length());
3721
341
                    }
3722
126
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESI_SH_IbLb1EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb0EESH_IbLb1EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESJ_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESH_IbLb0EESI_EEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
_ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SH_IbLb0EEEEDaSC_SD_SE_
Line
Count
Source
3707
125
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
250
                    for (int i = 0; i < input_rows_count; ++i) {
3709
125
                        StringRef origin_str =
3710
125
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
125
                        StringRef old_str =
3712
125
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
125
                        StringRef new_str =
3714
125
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
125
                        std::string result =
3717
125
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
125
                                        new_str.to_string_view());
3719
3720
125
                        col_res->insert_data(result.data(), result.length());
3721
125
                    }
3722
125
                },
Unexecuted instantiation: _ZZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_E_clISt17integral_constantIbLb1EESI_SI_EEDaSC_SD_SE_
3723
2.12k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3724
2.12k
                make_bool_variant(col_const[2]));
3725
3726
2.12k
        block.replace_by_position(result, std::move(col_res));
3727
2.12k
        return Status::OK();
3728
2.12k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3690
1.24k
                        uint32_t result, size_t input_rows_count) const override {
3691
        // We need a local variable to hold a reference to the converted column.
3692
        // So that the converted column will not be released before we use it.
3693
1.24k
        ColumnPtr col[3];
3694
1.24k
        bool col_const[3];
3695
4.99k
        for (size_t i = 0; i < 3; ++i) {
3696
3.74k
            std::tie(col[i], col_const[i]) =
3697
3.74k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3698
3.74k
        }
3699
3700
1.24k
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3701
1.24k
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3702
1.24k
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3703
3704
1.24k
        ColumnString::MutablePtr col_res = ColumnString::create();
3705
3706
1.24k
        std::visit(
3707
1.24k
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
1.24k
                    for (int i = 0; i < input_rows_count; ++i) {
3709
1.24k
                        StringRef origin_str =
3710
1.24k
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
1.24k
                        StringRef old_str =
3712
1.24k
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
1.24k
                        StringRef new_str =
3714
1.24k
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
1.24k
                        std::string result =
3717
1.24k
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
1.24k
                                        new_str.to_string_view());
3719
3720
1.24k
                        col_res->insert_data(result.data(), result.length());
3721
1.24k
                    }
3722
1.24k
                },
3723
1.24k
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3724
1.24k
                make_bool_variant(col_const[2]));
3725
3726
1.24k
        block.replace_by_position(result, std::move(col_res));
3727
1.24k
        return Status::OK();
3728
1.24k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
3690
876
                        uint32_t result, size_t input_rows_count) const override {
3691
        // We need a local variable to hold a reference to the converted column.
3692
        // So that the converted column will not be released before we use it.
3693
876
        ColumnPtr col[3];
3694
876
        bool col_const[3];
3695
3.50k
        for (size_t i = 0; i < 3; ++i) {
3696
2.62k
            std::tie(col[i], col_const[i]) =
3697
2.62k
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3698
2.62k
        }
3699
3700
876
        const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
3701
876
        const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
3702
876
        const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());
3703
3704
876
        ColumnString::MutablePtr col_res = ColumnString::create();
3705
3706
876
        std::visit(
3707
876
                [&](auto origin_str_const, auto old_str_const, auto new_str_const) {
3708
876
                    for (int i = 0; i < input_rows_count; ++i) {
3709
876
                        StringRef origin_str =
3710
876
                                col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
3711
876
                        StringRef old_str =
3712
876
                                col_old_str->get_data_at(index_check_const<old_str_const>(i));
3713
876
                        StringRef new_str =
3714
876
                                col_new_str->get_data_at(index_check_const<new_str_const>(i));
3715
3716
876
                        std::string result =
3717
876
                                replace(origin_str.to_string(), old_str.to_string_view(),
3718
876
                                        new_str.to_string_view());
3719
3720
876
                        col_res->insert_data(result.data(), result.length());
3721
876
                    }
3722
876
                },
3723
876
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3724
876
                make_bool_variant(col_const[2]));
3725
3726
876
        block.replace_by_position(result, std::move(col_res));
3727
876
        return Status::OK();
3728
876
    }
3729
3730
private:
3731
5.42k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3732
5.42k
        if (old_str.empty()) {
3733
494
            if constexpr (empty) {
3734
247
                return str;
3735
247
            } else {
3736
                // Different from "Replace" only when the search string is empty.
3737
                // it will insert `new_str` in front of every character and at the end of the old str.
3738
247
                if (new_str.empty()) {
3739
59
                    return str;
3740
59
                }
3741
188
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3742
188
                    std::string result;
3743
188
                    ColumnString::check_chars_length(
3744
188
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3745
188
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3746
648
                    for (char c : str) {
3747
648
                        result += new_str;
3748
648
                        result += c;
3749
648
                    }
3750
188
                    result += new_str;
3751
188
                    return result;
3752
188
                } else {
3753
0
                    std::string result;
3754
0
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3755
0
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3756
0
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3757
0
                        result += new_str;
3758
0
                        result.append(&str[i], utf8_char_len);
3759
0
                    }
3760
0
                    result += new_str;
3761
0
                    ColumnString::check_chars_length(result.size(), 0);
3762
0
                    return result;
3763
0
                }
3764
188
            }
3765
4.93k
        } else {
3766
4.93k
            std::string::size_type pos = 0;
3767
4.93k
            std::string::size_type oldLen = old_str.size();
3768
4.93k
            std::string::size_type newLen = new_str.size();
3769
6.06k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3770
1.13k
                str.replace(pos, oldLen, new_str);
3771
1.13k
                pos += newLen;
3772
1.13k
            }
3773
4.93k
            return str;
3774
4.93k
        }
3775
5.42k
    }
_ZNK5doris15FunctionReplaceINS_11ReplaceImplELb1EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
3731
4.33k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3732
4.33k
        if (old_str.empty()) {
3733
247
            if constexpr (empty) {
3734
247
                return str;
3735
            } else {
3736
                // Different from "Replace" only when the search string is empty.
3737
                // it will insert `new_str` in front of every character and at the end of the old str.
3738
                if (new_str.empty()) {
3739
                    return str;
3740
                }
3741
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3742
                    std::string result;
3743
                    ColumnString::check_chars_length(
3744
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3745
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3746
                    for (char c : str) {
3747
                        result += new_str;
3748
                        result += c;
3749
                    }
3750
                    result += new_str;
3751
                    return result;
3752
                } else {
3753
                    std::string result;
3754
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3755
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3756
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3757
                        result += new_str;
3758
                        result.append(&str[i], utf8_char_len);
3759
                    }
3760
                    result += new_str;
3761
                    ColumnString::check_chars_length(result.size(), 0);
3762
                    return result;
3763
                }
3764
            }
3765
4.08k
        } else {
3766
4.08k
            std::string::size_type pos = 0;
3767
4.08k
            std::string::size_type oldLen = old_str.size();
3768
4.08k
            std::string::size_type newLen = new_str.size();
3769
4.89k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3770
808
                str.replace(pos, oldLen, new_str);
3771
808
                pos += newLen;
3772
808
            }
3773
4.08k
            return str;
3774
4.08k
        }
3775
4.33k
    }
_ZNK5doris15FunctionReplaceINS_16ReplaceEmptyImplELb0EE7replaceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEESt17basic_string_viewIcS6_ESA_
Line
Count
Source
3731
1.09k
    std::string replace(std::string str, std::string_view old_str, std::string_view new_str) const {
3732
1.09k
        if (old_str.empty()) {
3733
            if constexpr (empty) {
3734
                return str;
3735
247
            } else {
3736
                // Different from "Replace" only when the search string is empty.
3737
                // it will insert `new_str` in front of every character and at the end of the old str.
3738
247
                if (new_str.empty()) {
3739
59
                    return str;
3740
59
                }
3741
188
                if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
3742
188
                    std::string result;
3743
188
                    ColumnString::check_chars_length(
3744
188
                            str.length() * (new_str.length() + 1) + new_str.length(), 0);
3745
188
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3746
648
                    for (char c : str) {
3747
648
                        result += new_str;
3748
648
                        result += c;
3749
648
                    }
3750
188
                    result += new_str;
3751
188
                    return result;
3752
188
                } else {
3753
0
                    std::string result;
3754
0
                    result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
3755
0
                    for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
3756
0
                        utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
3757
0
                        result += new_str;
3758
0
                        result.append(&str[i], utf8_char_len);
3759
0
                    }
3760
0
                    result += new_str;
3761
0
                    ColumnString::check_chars_length(result.size(), 0);
3762
0
                    return result;
3763
0
                }
3764
188
            }
3765
844
        } else {
3766
844
            std::string::size_type pos = 0;
3767
844
            std::string::size_type oldLen = old_str.size();
3768
844
            std::string::size_type newLen = new_str.size();
3769
1.17k
            while ((pos = str.find(old_str, pos)) != std::string::npos) {
3770
328
                str.replace(pos, oldLen, new_str);
3771
328
                pos += newLen;
3772
328
            }
3773
844
            return str;
3774
844
        }
3775
1.09k
    }
3776
};
3777
3778
struct ReverseImpl {
3779
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
3780
53
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
3781
53
        auto rows_count = offsets.size();
3782
53
        res_offsets.resize(rows_count);
3783
53
        res_data.reserve(data.size());
3784
158
        for (ssize_t i = 0; i < rows_count; ++i) {
3785
105
            auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
3786
105
            int64_t src_len = offsets[i] - offsets[i - 1];
3787
105
            std::string dst;
3788
105
            dst.resize(src_len);
3789
105
            simd::VStringFunctions::reverse(StringRef((uint8_t*)src_str, src_len), &dst);
3790
105
            StringOP::push_value_string(std::string_view(dst.data(), src_len), i, res_data,
3791
105
                                        res_offsets);
3792
105
        }
3793
53
        return Status::OK();
3794
53
    }
3795
};
3796
3797
template <typename Impl>
3798
class FunctionSubReplace : public IFunction {
3799
public:
3800
    static constexpr auto name = "sub_replace";
3801
3802
16
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE6createEv
Line
Count
Source
3802
8
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
_ZN5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE6createEv
Line
Count
Source
3802
8
    static FunctionPtr create() { return std::make_shared<FunctionSubReplace<Impl>>(); }
3803
3804
0
    String get_name() const override { return name; }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE8get_nameB5cxx11Ev
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE8get_nameB5cxx11Ev
3805
3806
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
3807
0
        return make_nullable(std::make_shared<DataTypeString>());
3808
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS7_EE
3809
3810
2
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE11is_variadicEv
Line
Count
Source
3810
1
    bool is_variadic() const override { return true; }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE11is_variadicEv
Line
Count
Source
3810
1
    bool is_variadic() const override { return true; }
3811
3812
14
    DataTypes get_variadic_argument_types_impl() const override {
3813
14
        return Impl::get_variadic_argument_types();
3814
14
    }
_ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE32get_variadic_argument_types_implEv
Line
Count
Source
3812
7
    DataTypes get_variadic_argument_types_impl() const override {
3813
7
        return Impl::get_variadic_argument_types();
3814
7
    }
_ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE32get_variadic_argument_types_implEv
Line
Count
Source
3812
7
    DataTypes get_variadic_argument_types_impl() const override {
3813
7
        return Impl::get_variadic_argument_types();
3814
7
    }
3815
3816
0
    size_t get_number_of_arguments() const override {
3817
0
        return get_variadic_argument_types_impl().size();
3818
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE23get_number_of_argumentsEv
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE23get_number_of_argumentsEv
3819
3820
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
3821
0
                        uint32_t result, size_t input_rows_count) const override {
3822
0
        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
3823
0
    }
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_19SubReplaceThreeImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Unexecuted instantiation: _ZNK5doris18FunctionSubReplaceINS_18SubReplaceFourImplEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
3824
};
3825
3826
struct SubReplaceImpl {
3827
    static Status replace_execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
3828
1
                                  size_t input_rows_count) {
3829
1
        auto res_column = ColumnString::create();
3830
1
        auto* result_column = assert_cast<ColumnString*>(res_column.get());
3831
1
        auto args_null_map = ColumnUInt8::create(input_rows_count, 0);
3832
1
        ColumnPtr argument_columns[4];
3833
1
        bool col_const[4];
3834
5
        for (int i = 0; i < 4; ++i) {
3835
4
            std::tie(argument_columns[i], col_const[i]) =
3836
4
                    unpack_if_const(block.get_by_position(arguments[i]).column);
3837
4
        }
3838
1
        const auto* data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
3839
1
        const auto* mask_column = assert_cast<const ColumnString*>(argument_columns[1].get());
3840
1
        const auto* start_column = assert_cast<const ColumnInt32*>(argument_columns[2].get());
3841
1
        const auto* length_column = assert_cast<const ColumnInt32*>(argument_columns[3].get());
3842
3843
1
        std::visit(
3844
1
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
3845
1
                    if (data_column->is_ascii()) {
3846
1
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
3847
1
                                data_column, mask_column, start_column->get_data(),
3848
1
                                length_column->get_data(), args_null_map->get_data(), result_column,
3849
1
                                input_rows_count);
3850
1
                    } else {
3851
0
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
3852
0
                                data_column, mask_column, start_column->get_data(),
3853
0
                                length_column->get_data(), args_null_map->get_data(), result_column,
3854
0
                                input_rows_count);
3855
0
                    }
3856
1
                },
_ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SF_EEDaS8_S9_SA_SB_
Line
Count
Source
3844
1
                [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) {
3845
1
                    if (data_column->is_ascii()) {
3846
1
                        vector_ascii<origin_str_const, new_str_const, start_const, len_const>(
3847
1
                                data_column, mask_column, start_column->get_data(),
3848
1
                                length_column->get_data(), args_null_map->get_data(), result_column,
3849
1
                                input_rows_count);
3850
1
                    } else {
3851
0
                        vector_utf8<origin_str_const, new_str_const, start_const, len_const>(
3852
0
                                data_column, mask_column, start_column->get_data(),
3853
0
                                length_column->get_data(), args_null_map->get_data(), result_column,
3854
0
                                input_rows_count);
3855
0
                    }
3856
1
                },
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SF_SE_IbLb1EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESF_SE_IbLb1EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESE_IbLb1EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESG_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESE_IbLb0EESF_SF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESG_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SE_IbLb0EESF_EEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SE_IbLb0EEEEDaS8_S9_SA_SB_
Unexecuted instantiation: _ZZN5doris14SubReplaceImpl15replace_executeERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESF_SF_SF_EEDaS8_S9_SA_SB_
3857
1
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
3858
1
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
3859
1
        block.get_by_position(result).column =
3860
1
                ColumnNullable::create(std::move(res_column), std::move(args_null_map));
3861
1
        return Status::OK();
3862
1
    }
3863
3864
private:
3865
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
3866
    static void vector_ascii(const ColumnString* data_column, const ColumnString* mask_column,
3867
                             const PaddedPODArray<Int32>& args_start,
3868
                             const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
3869
1
                             ColumnString* result_column, size_t input_rows_count) {
3870
1
        ColumnString::Chars& res_chars = result_column->get_chars();
3871
1
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3872
10.2k
        for (size_t row = 0; row < input_rows_count; ++row) {
3873
10.2k
            StringRef origin_str =
3874
10.2k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3875
10.2k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3876
10.2k
            const auto start = args_start[index_check_const<start_const>(row)];
3877
10.2k
            const auto length = args_length[index_check_const<len_const>(row)];
3878
10.2k
            const size_t origin_str_len = origin_str.size;
3879
            //input is null, start < 0, len < 0, str_size <= start. return NULL
3880
10.2k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
3881
10.2k
                res_offsets.push_back(res_chars.size());
3882
10.2k
                args_null_map[row] = 1;
3883
10.2k
            } else {
3884
0
                std::string_view replace_str = new_str.to_string_view();
3885
0
                std::string result = origin_str.to_string();
3886
0
                result.replace(start, length, replace_str);
3887
0
                result_column->insert_data(result.data(), result.length());
3888
0
            }
3889
10.2k
        }
3890
1
    }
_ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Line
Count
Source
3869
1
                             ColumnString* result_column, size_t input_rows_count) {
3870
1
        ColumnString::Chars& res_chars = result_column->get_chars();
3871
1
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3872
10.2k
        for (size_t row = 0; row < input_rows_count; ++row) {
3873
10.2k
            StringRef origin_str =
3874
10.2k
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3875
10.2k
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3876
10.2k
            const auto start = args_start[index_check_const<start_const>(row)];
3877
10.2k
            const auto length = args_length[index_check_const<len_const>(row)];
3878
10.2k
            const size_t origin_str_len = origin_str.size;
3879
            //input is null, start < 0, len < 0, str_size <= start. return NULL
3880
10.2k
            if (args_null_map[row] || start < 0 || length < 0 || origin_str_len <= start) {
3881
10.2k
                res_offsets.push_back(res_chars.size());
3882
10.2k
                args_null_map[row] = 1;
3883
10.2k
            } else {
3884
0
                std::string_view replace_str = new_str.to_string_view();
3885
0
                std::string result = origin_str.to_string();
3886
0
                result.replace(start, length, replace_str);
3887
0
                result_column->insert_data(result.data(), result.length());
3888
0
            }
3889
10.2k
        }
3890
1
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
3891
3892
    template <bool origin_str_const, bool new_str_const, bool start_const, bool len_const>
3893
    static void vector_utf8(const ColumnString* data_column, const ColumnString* mask_column,
3894
                            const PaddedPODArray<Int32>& args_start,
3895
                            const PaddedPODArray<Int32>& args_length, NullMap& args_null_map,
3896
0
                            ColumnString* result_column, size_t input_rows_count) {
3897
0
        ColumnString::Chars& res_chars = result_column->get_chars();
3898
0
        ColumnString::Offsets& res_offsets = result_column->get_offsets();
3899
3900
0
        for (size_t row = 0; row < input_rows_count; ++row) {
3901
0
            StringRef origin_str =
3902
0
                    data_column->get_data_at(index_check_const<origin_str_const>(row));
3903
0
            StringRef new_str = mask_column->get_data_at(index_check_const<new_str_const>(row));
3904
0
            const auto start = args_start[index_check_const<start_const>(row)];
3905
0
            const auto length = args_length[index_check_const<len_const>(row)];
3906
            //input is null, start < 0, len < 0 return NULL
3907
0
            if (args_null_map[row] || start < 0 || length < 0) {
3908
0
                res_offsets.push_back(res_chars.size());
3909
0
                args_null_map[row] = 1;
3910
0
                continue;
3911
0
            }
3912
3913
0
            const auto [start_byte_len, start_char_len] =
3914
0
                    simd::VStringFunctions::iterate_utf8_with_limit_length(origin_str.begin(),
3915
0
                                                                           origin_str.end(), start);
3916
3917
            // start >= orgin.size
3918
0
            DCHECK(start_char_len <= start);
3919
0
            if (start_byte_len == origin_str.size) {
3920
0
                res_offsets.push_back(res_chars.size());
3921
0
                args_null_map[row] = 1;
3922
0
                continue;
3923
0
            }
3924
3925
0
            auto [end_byte_len, end_char_len] =
3926
0
                    simd::VStringFunctions::iterate_utf8_with_limit_length(
3927
0
                            origin_str.begin() + start_byte_len, origin_str.end(), length);
3928
0
            DCHECK(end_char_len <= length);
3929
0
            std::string_view replace_str = new_str.to_string_view();
3930
0
            std::string result = origin_str.to_string();
3931
0
            result.replace(start_byte_len, end_byte_len, replace_str);
3932
0
            result_column->insert_data(result.data(), result.length());
3933
0
        }
3934
0
    }
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
Unexecuted instantiation: _ZN5doris14SubReplaceImpl11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEES5_RKNS_8PODArrayIiLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEESC_RNS6_IhLm4096ES9_Lm16ELm15EEEPS3_m
3935
};
3936
3937
struct SubReplaceThreeImpl {
3938
7
    static DataTypes get_variadic_argument_types() {
3939
7
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3940
7
                std::make_shared<DataTypeInt32>()};
3941
7
    }
3942
3943
    static Status execute_impl(FunctionContext* context, Block& block,
3944
                               const ColumnNumbers& arguments, uint32_t result,
3945
0
                               size_t input_rows_count) {
3946
0
        auto params = ColumnInt32::create(input_rows_count);
3947
0
        auto& strlen_data = params->get_data();
3948
3949
0
        auto str_col =
3950
0
                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
3951
0
        if (const auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
3952
0
            str_col = nullable->get_nested_column_ptr();
3953
0
        }
3954
0
        const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
3955
        // use utf8 len
3956
0
        for (int i = 0; i < input_rows_count; ++i) {
3957
0
            StringRef str_ref = str_column->get_data_at(i);
3958
0
            strlen_data[i] = simd::VStringFunctions::get_char_len(str_ref.data, str_ref.size);
3959
0
        }
3960
3961
0
        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
3962
0
        ColumnNumbers temp_arguments = {arguments[0], arguments[1], arguments[2],
3963
0
                                        block.columns() - 1};
3964
0
        return SubReplaceImpl::replace_execute(block, temp_arguments, result, input_rows_count);
3965
0
    }
3966
};
3967
3968
struct SubReplaceFourImpl {
3969
7
    static DataTypes get_variadic_argument_types() {
3970
7
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
3971
7
                std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeInt32>()};
3972
7
    }
3973
3974
    static Status execute_impl(FunctionContext* context, Block& block,
3975
                               const ColumnNumbers& arguments, uint32_t result,
3976
0
                               size_t input_rows_count) {
3977
0
        return SubReplaceImpl::replace_execute(block, arguments, result, input_rows_count);
3978
0
    }
3979
};
3980
3981
class FunctionConvertTo : public IFunction {
3982
public:
3983
    static constexpr auto name = "convert_to";
3984
3985
8
    static FunctionPtr create() { return std::make_shared<FunctionConvertTo>(); }
3986
3987
1
    String get_name() const override { return name; }
3988
3989
0
    size_t get_number_of_arguments() const override { return 2; }
3990
3991
0
    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
3992
0
        return std::make_shared<DataTypeString>();
3993
0
    }
3994
3995
0
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
3996
0
        if (scope != FunctionContext::THREAD_LOCAL) {
3997
0
            return Status::OK();
3998
0
        }
3999
0
        if (!context->is_col_constant(1)) {
4000
0
            return Status::InvalidArgument(
4001
0
                    "character argument to convert function must be constant.");
4002
0
        }
4003
0
        const auto& character_data = context->get_constant_col(1)->column_ptr->get_data_at(0);
4004
0
        if (!iequal(character_data.to_string(), "gbk")) {
4005
0
            return Status::RuntimeError(
4006
0
                    "Illegal second argument column of function convert. now only support "
4007
0
                    "convert to character set of gbk");
4008
0
        }
4009
4010
0
        return Status::OK();
4011
0
    }
4012
4013
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4014
0
                        uint32_t result, size_t input_rows_count) const override {
4015
0
        ColumnPtr argument_column =
4016
0
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
4017
0
        const ColumnString* str_col = static_cast<const ColumnString*>(argument_column.get());
4018
0
        const auto& str_offset = str_col->get_offsets();
4019
0
        const auto& str_chars = str_col->get_chars();
4020
0
        auto col_res = ColumnString::create();
4021
0
        auto& res_offset = col_res->get_offsets();
4022
0
        auto& res_chars = col_res->get_chars();
4023
0
        res_offset.resize(input_rows_count);
4024
        // max pinyin size is 6 + 1 (first '~') for utf8 chinese word 3
4025
0
        size_t pinyin_size = (str_chars.size() + 2) / 3 * 7;
4026
0
        ColumnString::check_chars_length(pinyin_size, 0);
4027
0
        res_chars.resize(pinyin_size);
4028
4029
0
        size_t in_len = 0, out_len = 0;
4030
0
        for (int i = 0; i < input_rows_count; ++i) {
4031
0
            in_len = str_offset[i] - str_offset[i - 1];
4032
0
            const char* in = reinterpret_cast<const char*>(&str_chars[str_offset[i - 1]]);
4033
0
            char* out = reinterpret_cast<char*>(&res_chars[res_offset[i - 1]]);
4034
0
            _utf8_to_pinyin(in, in_len, out, &out_len);
4035
0
            res_offset[i] = res_offset[i - 1] + out_len;
4036
0
        }
4037
0
        res_chars.resize(res_offset[input_rows_count - 1]);
4038
0
        block.replace_by_position(result, std::move(col_res));
4039
0
        return Status::OK();
4040
0
    }
4041
4042
0
    void _utf8_to_pinyin(const char* in, size_t in_len, char* out, size_t* out_len) const {
4043
0
        auto do_memcpy = [](char*& dest, const char*& from, size_t size) {
4044
0
            memcpy_small_allow_read_write_overflow15(dest, from, size);
4045
0
            dest += size;
4046
0
            from += size;
4047
0
        };
4048
0
        auto from = in;
4049
0
        auto dest = out;
4050
4051
0
        while (from - in < in_len) {
4052
0
            auto length = get_utf8_byte_length(*from);
4053
0
            if (length != 3) {
4054
0
                do_memcpy(dest, from, length);
4055
0
            } else {
4056
                // convert utf8 to unicode code to get pinyin offset
4057
0
                if (auto tmp = (((int)(*from & 0x0F)) << 12) | (((int)(*(from + 1) & 0x3F)) << 6) |
4058
0
                               (*(from + 2) & 0x3F);
4059
0
                    tmp >= START_UNICODE_OFFSET and tmp < END_UNICODE_OFFSET) {
4060
0
                    const char* buf = nullptr;
4061
0
                    if (tmp >= START_UNICODE_OFFSET && tmp < MID_UNICODE_OFFSET) {
4062
0
                        buf = PINYIN_DICT1 + (tmp - START_UNICODE_OFFSET) * MAX_PINYIN_LEN;
4063
0
                    } else if (tmp >= MID_UNICODE_OFFSET && tmp < END_UNICODE_OFFSET) {
4064
0
                        buf = PINYIN_DICT2 + (tmp - MID_UNICODE_OFFSET) * MAX_PINYIN_LEN;
4065
0
                    }
4066
4067
0
                    auto end = strchr(buf, ' ');
4068
                    // max len for pinyin is 6
4069
0
                    int len = MAX_PINYIN_LEN;
4070
0
                    if (end != nullptr && end - buf < MAX_PINYIN_LEN) {
4071
0
                        len = end - buf;
4072
0
                    }
4073
                    // set first char '~' just make sure all english word lower than chinese word
4074
0
                    *dest = 126;
4075
0
                    memcpy(dest + 1, buf, len);
4076
0
                    dest += (len + 1);
4077
0
                    from += 3;
4078
0
                } else {
4079
0
                    do_memcpy(dest, from, 3);
4080
0
                }
4081
0
            }
4082
0
        }
4083
4084
0
        *out_len = dest - out;
4085
0
    }
4086
};
4087
4088
// refer to https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_char
4089
//      UTF8
4090
// 多  0xe5, 0xa4, 0x9a  0xb6, 0xe0
4091
// 睿  0xe7, 0x9d, 0xbf  0xee, 0xa3
4092
// 丝  0xe4, 0xb8, 0x9d  0xcb, 0xbf 14989469
4093
// MySQL behaviour:
4094
// mysql> select char(0xe4, 0xb8, 0x9d using utf8);
4095
// +-----------------------------------+
4096
// | char(0xe4, 0xb8, 0x9d using utf8) |
4097
// +-----------------------------------+
4098
// | 丝                                |
4099
// +-----------------------------------+
4100
// 1 row in set, 1 warning (0.00 sec)
4101
// mysql> select char(14989469 using utf8);
4102
// +---------------------------+
4103
// | char(14989469 using utf8) |
4104
// +---------------------------+
4105
// | 丝                        |
4106
// +---------------------------+
4107
// 1 row in set, 1 warning (0.00 sec)
4108
// mysql> select char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8);
4109
// +---------------------------------------------------------------------------------------------+
4110
// | char(0xe5, 0xa4, 0x9a, 0xe7, 0x9d, 0xbf, 0xe4, 0xb8, 0x9d, 68, 111, 114, 105, 115 using utf8) |
4111
// +---------------------------------------------------------------------------------------------+
4112
// | 多睿丝 Doris                                                                                 |
4113
// +---------------------------------------------------------------------------------------------+
4114
// mysql> select char(68, 111, 114, 0, 105, null, 115 using utf8);
4115
// +--------------------------------------------------+
4116
// | char(68, 111, 114, 0, 105, null, 115 using utf8) |
4117
// +--------------------------------------------------+
4118
// | Dor is                                           |
4119
// +--------------------------------------------------+
4120
4121
// return null:
4122
// mysql>  select char(255 using utf8);
4123
// +----------------------+
4124
// | char(255 using utf8) |
4125
// +----------------------+
4126
// | NULL                 |
4127
// +----------------------+
4128
// 1 row in set, 2 warnings (0.00 sec)
4129
//
4130
// mysql> show warnings;
4131
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4132
// | Level   | Code | Message                                                                                                                                                                     |
4133
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4134
// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. |
4135
// | Warning | 1300 | Invalid utf8mb3 character string: 'FF'                                                                                                                                      |
4136
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4137
// 2 rows in set (0.01 sec)
4138
4139
// max int value:
4140
// mysql> select char(18446744073709551615);
4141
// +--------------------------------------------------------+
4142
// | char(18446744073709551615)                             |
4143
// +--------------------------------------------------------+
4144
// | 0xFFFFFFFF                                             |
4145
// +--------------------------------------------------------+
4146
// 1 row in set (0.00 sec)
4147
//
4148
// mysql> select char(18446744073709551616);
4149
// +--------------------------------------------------------+
4150
// | char(18446744073709551616)                             |
4151
// +--------------------------------------------------------+
4152
// | 0xFFFFFFFF                                             |
4153
// +--------------------------------------------------------+
4154
// 1 row in set, 1 warning (0.00 sec)
4155
//
4156
// mysql> show warnings;
4157
// +---------+------+-----------------------------------------------------------+
4158
// | Level   | Code | Message                                                   |
4159
// +---------+------+-----------------------------------------------------------+
4160
// | Warning | 1292 | Truncated incorrect DECIMAL value: '18446744073709551616' |
4161
// +---------+------+-----------------------------------------------------------+
4162
// 1 row in set (0.00 sec)
4163
4164
// table columns:
4165
// mysql> select * from t;
4166
// +------+------+------+
4167
// | f1   | f2   | f3   |
4168
// +------+------+------+
4169
// |  228 |  184 |  157 |
4170
// |  228 |  184 |    0 |
4171
// |  228 |  184 |   99 |
4172
// |   99 |  228 |  184 |
4173
// +------+------+------+
4174
// 4 rows in set (0.00 sec)
4175
//
4176
// mysql> select char(f1, f2, f3 using utf8) from t;
4177
// +-----------------------------+
4178
// | char(f1, f2, f3 using utf8) |
4179
// +-----------------------------+
4180
// | 丝                          |
4181
// |                             |
4182
// |                             |
4183
// | c                           |
4184
// +-----------------------------+
4185
// 4 rows in set, 4 warnings (0.00 sec)
4186
//
4187
// mysql> show warnings;
4188
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4189
// | Level   | Code | Message                                                                                                                                                                     |
4190
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4191
// | Warning | 3719 | 'utf8' is currently an alias for the character set UTF8MB3, but will be an alias for UTF8MB4 in a future release. Please consider using UTF8MB4 in order to be unambiguous. |
4192
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B800'                                                                                                                                  |
4193
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B863'                                                                                                                                  |
4194
// | Warning | 1300 | Invalid utf8mb3 character string: 'E4B8'                                                                                                                                    |
4195
// +---------+------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
4196
class FunctionIntToChar : public IFunction {
4197
public:
4198
    static constexpr auto name = "char";
4199
8
    static FunctionPtr create() { return std::make_shared<FunctionIntToChar>(); }
4200
0
    String get_name() const override { return name; }
4201
0
    size_t get_number_of_arguments() const override { return 0; }
4202
1
    bool is_variadic() const override { return true; }
4203
4204
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4205
0
        return make_nullable(std::make_shared<DataTypeString>());
4206
0
    }
4207
0
    bool use_default_implementation_for_nulls() const override { return false; }
4208
4209
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4210
0
                        uint32_t result, size_t input_rows_count) const override {
4211
0
        DCHECK_GE(arguments.size(), 2);
4212
4213
0
        int argument_size = arguments.size();
4214
0
        std::vector<ColumnPtr> str_columns(argument_size - 1);
4215
0
        std::vector<const ColumnString::Offsets*> offsets_list(argument_size - 1);
4216
0
        std::vector<const ColumnString::Chars*> chars_list(argument_size - 1);
4217
4218
        // convert each argument columns to column string and then concat the string columns
4219
0
        for (size_t i = 1; i < argument_size; ++i) {
4220
0
            if (auto const_column = check_and_get_column<const ColumnConst>(
4221
0
                        *block.get_by_position(arguments[i]).column)) {
4222
                // ignore null
4223
0
                if (const_column->only_null()) {
4224
0
                    str_columns[i - 1] = nullptr;
4225
0
                } else {
4226
0
                    auto str_column = ColumnString::create();
4227
0
                    auto& chars = str_column->get_chars();
4228
0
                    auto& offsets = str_column->get_offsets();
4229
0
                    offsets.resize(1);
4230
0
                    const ColumnInt32* int_column;
4231
0
                    if (auto* nullable = check_and_get_column<const ColumnNullable>(
4232
0
                                const_column->get_data_column())) {
4233
0
                        int_column = assert_cast<const ColumnInt32*>(
4234
0
                                nullable->get_nested_column_ptr().get());
4235
0
                    } else {
4236
0
                        int_column =
4237
0
                                assert_cast<const ColumnInt32*>(&const_column->get_data_column());
4238
0
                    }
4239
0
                    int int_val = int_column->get_int(0);
4240
0
                    integer_to_char_(0, &int_val, chars, offsets);
4241
0
                    str_columns[i - 1] =
4242
0
                            ColumnConst::create(std::move(str_column), input_rows_count);
4243
0
                }
4244
0
                offsets_list[i - 1] = nullptr;
4245
0
                chars_list[i - 1] = nullptr;
4246
0
            } else {
4247
0
                auto str_column = ColumnString::create();
4248
0
                auto& chars = str_column->get_chars();
4249
0
                auto& offsets = str_column->get_offsets();
4250
                // data.resize(input_rows_count);
4251
0
                offsets.resize(input_rows_count);
4252
4253
0
                if (auto nullable = check_and_get_column<const ColumnNullable>(
4254
0
                            *block.get_by_position(arguments[i]).column)) {
4255
0
                    const auto* int_data =
4256
0
                            assert_cast<const ColumnInt32*>(nullable->get_nested_column_ptr().get())
4257
0
                                    ->get_data()
4258
0
                                    .data();
4259
0
                    const auto* null_map_data = nullable->get_null_map_data().data();
4260
0
                    for (size_t j = 0; j < input_rows_count; ++j) {
4261
                        // ignore null
4262
0
                        if (null_map_data[j]) {
4263
0
                            offsets[j] = offsets[j - 1];
4264
0
                        } else {
4265
0
                            integer_to_char_(j, int_data + j, chars, offsets);
4266
0
                        }
4267
0
                    }
4268
0
                } else {
4269
0
                    const auto* int_data = assert_cast<const ColumnInt32*>(
4270
0
                                                   block.get_by_position(arguments[i]).column.get())
4271
0
                                                   ->get_data()
4272
0
                                                   .data();
4273
0
                    for (size_t j = 0; j < input_rows_count; ++j) {
4274
0
                        integer_to_char_(j, int_data + j, chars, offsets);
4275
0
                    }
4276
0
                }
4277
0
                offsets_list[i - 1] = &str_column->get_offsets();
4278
0
                chars_list[i - 1] = &str_column->get_chars();
4279
0
                str_columns[i - 1] = std::move(str_column);
4280
0
            }
4281
0
        }
4282
4283
0
        auto null_map = ColumnUInt8::create(input_rows_count, 0);
4284
0
        auto res = ColumnString::create();
4285
0
        auto& res_data = res->get_chars();
4286
0
        auto& res_offset = res->get_offsets();
4287
4288
0
        size_t res_reserve_size = 0;
4289
0
        for (size_t i = 0; i < argument_size - 1; ++i) {
4290
0
            if (!str_columns[i]) {
4291
0
                continue;
4292
0
            }
4293
0
            if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[i])) {
4294
0
                auto str_column =
4295
0
                        assert_cast<const ColumnString*>(&(const_column->get_data_column()));
4296
0
                auto& offsets = str_column->get_offsets();
4297
0
                res_reserve_size += (offsets[0] - offsets[-1]) * input_rows_count;
4298
0
            } else {
4299
0
                for (size_t j = 0; j < input_rows_count; ++j) {
4300
0
                    size_t append = (*offsets_list[i])[j] - (*offsets_list[i])[j - 1];
4301
                    // check whether the output might overflow(unlikely)
4302
0
                    if (UNLIKELY(UINT_MAX - append < res_reserve_size)) {
4303
0
                        return Status::BufferAllocFailed(
4304
0
                                "function char output is too large to allocate");
4305
0
                    }
4306
0
                    res_reserve_size += append;
4307
0
                }
4308
0
            }
4309
0
        }
4310
0
        if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) {
4311
0
            return Status::BufferAllocFailed("function char output is too large to allocate");
4312
0
        }
4313
0
        ColumnString::check_chars_length(res_reserve_size, 0);
4314
0
        res_data.resize(res_reserve_size);
4315
0
        res_offset.resize(input_rows_count);
4316
4317
0
        for (size_t i = 0; i < input_rows_count; ++i) {
4318
0
            int current_length = 0;
4319
0
            for (size_t j = 0; j < argument_size - 1; ++j) {
4320
0
                if (!str_columns[j]) {
4321
0
                    continue;
4322
0
                }
4323
0
                if (auto const_column = check_and_get_column<const ColumnConst>(*str_columns[j])) {
4324
0
                    auto str_column = assert_cast<const ColumnString*, TypeCheckOnRelease::DISABLE>(
4325
0
                            &(const_column->get_data_column()));
4326
0
                    auto data_item = str_column->get_data_at(0);
4327
0
                    memcpy_small_allow_read_write_overflow15(
4328
0
                            &res_data[res_offset[i - 1]] + current_length, data_item.data,
4329
0
                            data_item.size);
4330
0
                    current_length += data_item.size;
4331
0
                } else {
4332
0
                    auto& current_offsets = *offsets_list[j];
4333
0
                    auto& current_chars = *chars_list[j];
4334
4335
0
                    int size = current_offsets[i] - current_offsets[i - 1];
4336
0
                    if (size > 0) {
4337
0
                        memcpy_small_allow_read_write_overflow15(
4338
0
                                &res_data[res_offset[i - 1]] + current_length,
4339
0
                                &current_chars[current_offsets[i - 1]], size);
4340
0
                        current_length += size;
4341
0
                    }
4342
0
                }
4343
0
            }
4344
0
            res_offset[i] = res_offset[i - 1] + current_length;
4345
0
        }
4346
4347
        // validate utf8
4348
0
        auto* null_map_data = null_map->get_data().data();
4349
0
        for (size_t i = 0; i < input_rows_count; ++i) {
4350
0
            if (!validate_utf8((const char*)(&res_data[res_offset[i - 1]]),
4351
0
                               res_offset[i] - res_offset[i - 1])) {
4352
0
                null_map_data[i] = 1;
4353
0
            }
4354
0
        }
4355
4356
0
        block.get_by_position(result).column =
4357
0
                ColumnNullable::create(std::move(res), std::move(null_map));
4358
0
        return Status::OK();
4359
0
    }
4360
4361
private:
4362
    void integer_to_char_(int line_num, const int* num, ColumnString::Chars& chars,
4363
0
                          IColumn::Offsets& offsets) const {
4364
0
        if (0 == *num) {
4365
0
            chars.push_back('\0');
4366
0
            offsets[line_num] = offsets[line_num - 1] + 1;
4367
0
            return;
4368
0
        }
4369
0
        const char* bytes = (const char*)(num);
4370
0
        if constexpr (std::endian::native == std::endian::little) {
4371
0
            int k = 3;
4372
0
            for (; k >= 0; --k) {
4373
0
                if (bytes[k]) {
4374
0
                    break;
4375
0
                }
4376
0
            }
4377
0
            offsets[line_num] = offsets[line_num - 1] + k + 1;
4378
0
            for (; k >= 0; --k) {
4379
0
                chars.push_back(bytes[k] ? bytes[k] : '\0');
4380
0
            }
4381
        } else if constexpr (std::endian::native == std::endian::big) {
4382
            int k = 0;
4383
            for (; k < 4; ++k) {
4384
                if (bytes[k]) {
4385
                    break;
4386
                }
4387
            }
4388
            offsets[line_num] = offsets[line_num - 1] + 4 - k;
4389
            for (; k < 4; ++k) {
4390
                chars.push_back(bytes[k] ? bytes[k] : '\0');
4391
            }
4392
        } else {
4393
            static_assert(std::endian::native == std::endian::big ||
4394
                                  std::endian::native == std::endian::little,
4395
                          "Unsupported endianness");
4396
        }
4397
0
    }
4398
};
4399
4400
class FunctionOverlay : public IFunction {
4401
public:
4402
    static constexpr auto name = "overlay";
4403
25
    static FunctionPtr create() { return std::make_shared<FunctionOverlay>(); }
4404
1
    String get_name() const override { return name; }
4405
17
    size_t get_number_of_arguments() const override { return 4; }
4406
4407
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4408
17
        return std::make_shared<DataTypeString>();
4409
17
    }
4410
4411
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4412
13
                        uint32_t result, size_t input_rows_count) const override {
4413
13
        DCHECK_EQ(arguments.size(), 4);
4414
4415
13
        bool col_const[4];
4416
13
        ColumnPtr argument_columns[4];
4417
65
        for (int i = 0; i < 4; ++i) {
4418
52
            std::tie(argument_columns[i], col_const[i]) =
4419
52
                    unpack_if_const(block.get_by_position(arguments[i]).column);
4420
52
        }
4421
4422
13
        const auto* col_origin = assert_cast<const ColumnString*>(argument_columns[0].get());
4423
4424
13
        const auto* col_pos =
4425
13
                assert_cast<const ColumnInt32*>(argument_columns[1].get())->get_data().data();
4426
13
        const auto* col_len =
4427
13
                assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_data().data();
4428
13
        const auto* col_insert = assert_cast<const ColumnString*>(argument_columns[3].get());
4429
4430
13
        ColumnString::MutablePtr col_res = ColumnString::create();
4431
4432
        // if all input string is ascii, we can use ascii function to handle it
4433
13
        const bool is_all_ascii = col_origin->is_ascii() && col_insert->is_ascii();
4434
13
        std::visit(
4435
13
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
13
                    if (is_all_ascii) {
4437
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
6
                                input_rows_count);
4440
7
                    } else {
4441
7
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
7
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
7
                                input_rows_count);
4444
7
                    }
4445
13
                },
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SH_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
1
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
1
                    if (is_all_ascii) {
4437
0
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
0
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
0
                                input_rows_count);
4440
1
                    } else {
4441
1
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
1
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
1
                                input_rows_count);
4444
1
                    }
4445
1
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SH_SG_IbLb1EEEEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESH_SG_IbLb1EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESH_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SH_EEDaSA_SB_SC_SD_
_ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb0EESG_IbLb1EESI_SI_EEDaSA_SB_SC_SD_
Line
Count
Source
4435
12
                [&](auto origin_const, auto pos_const, auto len_const, auto insert_const) {
4436
12
                    if (is_all_ascii) {
4437
6
                        vector_ascii<origin_const, pos_const, len_const, insert_const>(
4438
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4439
6
                                input_rows_count);
4440
6
                    } else {
4441
6
                        vector_utf8<origin_const, pos_const, len_const, insert_const>(
4442
6
                                col_origin, col_pos, col_len, col_insert, col_res,
4443
6
                                input_rows_count);
4444
6
                    }
4445
12
                },
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESI_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESG_IbLb0EESH_SH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESI_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SG_IbLb0EESH_EEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SG_IbLb0EEEEDaSA_SB_SC_SD_
Unexecuted instantiation: _ZZNK5doris15FunctionOverlay12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjmENKUlT_T0_T1_T2_E_clISt17integral_constantIbLb1EESH_SH_SH_EEDaSA_SB_SC_SD_
4446
13
                make_bool_variant(col_const[0]), make_bool_variant(col_const[1]),
4447
13
                make_bool_variant(col_const[2]), make_bool_variant(col_const[3]));
4448
13
        block.replace_by_position(result, std::move(col_res));
4449
13
        return Status::OK();
4450
13
    }
4451
4452
private:
4453
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
4454
    static void vector_ascii(const ColumnString* col_origin, int const* col_pos, int const* col_len,
4455
                             const ColumnString* col_insert, ColumnString::MutablePtr& col_res,
4456
6
                             size_t input_rows_count) {
4457
6
        auto& col_res_chars = col_res->get_chars();
4458
6
        auto& col_res_offsets = col_res->get_offsets();
4459
6
        StringRef origin_str, insert_str;
4460
12
        for (size_t i = 0; i < input_rows_count; i++) {
4461
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4462
            // pos is 1-based index,so we need to minus 1
4463
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4464
6
            const auto len = col_len[index_check_const<len_const>(i)];
4465
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4466
6
            const auto origin_size = origin_str.size;
4467
6
            if (pos >= origin_size || pos < 0) {
4468
                // If pos is not within the length of the string, the original string is returned.
4469
3
                col_res->insert_data(origin_str.data, origin_str.size);
4470
3
                continue;
4471
3
            }
4472
3
            col_res_chars.insert(origin_str.data,
4473
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4474
3
            if (pos + len > origin_size || len < 0) {
4475
1
                col_res_chars.insert(insert_str.begin(),
4476
1
                                     insert_str.end()); // copy all of insert_str.
4477
2
            } else {
4478
2
                col_res_chars.insert(insert_str.begin(),
4479
2
                                     insert_str.end()); // copy all of insert_str.
4480
2
                col_res_chars.insert(
4481
2
                        origin_str.data + pos + len,
4482
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4483
2
            }
4484
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4485
3
            col_res_offsets.push_back(col_res_chars.size());
4486
3
        }
4487
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay12vector_asciiILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4456
6
                             size_t input_rows_count) {
4457
6
        auto& col_res_chars = col_res->get_chars();
4458
6
        auto& col_res_offsets = col_res->get_offsets();
4459
6
        StringRef origin_str, insert_str;
4460
12
        for (size_t i = 0; i < input_rows_count; i++) {
4461
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4462
            // pos is 1-based index,so we need to minus 1
4463
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4464
6
            const auto len = col_len[index_check_const<len_const>(i)];
4465
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4466
6
            const auto origin_size = origin_str.size;
4467
6
            if (pos >= origin_size || pos < 0) {
4468
                // If pos is not within the length of the string, the original string is returned.
4469
3
                col_res->insert_data(origin_str.data, origin_str.size);
4470
3
                continue;
4471
3
            }
4472
3
            col_res_chars.insert(origin_str.data,
4473
3
                                 origin_str.data + pos); // copy origin_str with index 0 to pos - 1
4474
3
            if (pos + len > origin_size || len < 0) {
4475
1
                col_res_chars.insert(insert_str.begin(),
4476
1
                                     insert_str.end()); // copy all of insert_str.
4477
2
            } else {
4478
2
                col_res_chars.insert(insert_str.begin(),
4479
2
                                     insert_str.end()); // copy all of insert_str.
4480
2
                col_res_chars.insert(
4481
2
                        origin_str.data + pos + len,
4482
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4483
2
            }
4484
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4485
3
            col_res_offsets.push_back(col_res_chars.size());
4486
3
        }
4487
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay12vector_asciiILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
4488
4489
    template <bool origin_const, bool pos_const, bool len_const, bool insert_const>
4490
    NO_SANITIZE_UNDEFINED static void vector_utf8(const ColumnString* col_origin,
4491
                                                  int const* col_pos, int const* col_len,
4492
                                                  const ColumnString* col_insert,
4493
                                                  ColumnString::MutablePtr& col_res,
4494
7
                                                  size_t input_rows_count) {
4495
7
        auto& col_res_chars = col_res->get_chars();
4496
7
        auto& col_res_offsets = col_res->get_offsets();
4497
7
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
7
        std::vector<size_t> utf8_origin_offsets;
4501
29
        for (size_t i = 0; i < input_rows_count; i++) {
4502
22
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
22
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
22
            const auto len = col_len[index_check_const<len_const>(i)];
4506
22
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
22
            utf8_origin_offsets.clear();
4508
4509
160
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
138
                utf8_origin_offsets.push_back(ni);
4511
138
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
138
            }
4513
4514
22
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
22
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
13
                col_res->insert_data(origin_str.data, origin_str.size);
4519
13
                continue;
4520
13
            }
4521
9
            col_res_chars.insert(
4522
9
                    origin_str.data,
4523
9
                    origin_str.data +
4524
9
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
9
            if (pos + len >= utf8_origin_size || len < 0) {
4526
4
                col_res_chars.insert(insert_str.begin(),
4527
4
                                     insert_str.end()); // copy all of insert_str.
4528
5
            } else {
4529
5
                col_res_chars.insert(insert_str.begin(),
4530
5
                                     insert_str.end()); // copy all of insert_str.
4531
5
                col_res_chars.insert(
4532
5
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
5
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
5
            }
4535
9
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
9
            col_res_offsets.push_back(col_res_chars.size());
4537
9
        }
4538
7
    }
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4494
1
                                                  size_t input_rows_count) {
4495
1
        auto& col_res_chars = col_res->get_chars();
4496
1
        auto& col_res_offsets = col_res->get_offsets();
4497
1
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
1
        std::vector<size_t> utf8_origin_offsets;
4501
17
        for (size_t i = 0; i < input_rows_count; i++) {
4502
16
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
16
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
16
            const auto len = col_len[index_check_const<len_const>(i)];
4506
16
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
16
            utf8_origin_offsets.clear();
4508
4509
116
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
100
                utf8_origin_offsets.push_back(ni);
4511
100
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
100
            }
4513
4514
16
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
16
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
10
                col_res->insert_data(origin_str.data, origin_str.size);
4519
10
                continue;
4520
10
            }
4521
6
            col_res_chars.insert(
4522
6
                    origin_str.data,
4523
6
                    origin_str.data +
4524
6
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
6
            if (pos + len >= utf8_origin_size || len < 0) {
4526
3
                col_res_chars.insert(insert_str.begin(),
4527
3
                                     insert_str.end()); // copy all of insert_str.
4528
3
            } else {
4529
3
                col_res_chars.insert(insert_str.begin(),
4530
3
                                     insert_str.end()); // copy all of insert_str.
4531
3
                col_res_chars.insert(
4532
3
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
3
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
3
            }
4535
6
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
6
            col_res_offsets.push_back(col_res_chars.size());
4537
6
        }
4538
1
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
_ZN5doris15FunctionOverlay11vector_utf8ILb0ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Line
Count
Source
4494
6
                                                  size_t input_rows_count) {
4495
6
        auto& col_res_chars = col_res->get_chars();
4496
6
        auto& col_res_offsets = col_res->get_offsets();
4497
6
        StringRef origin_str, insert_str;
4498
        // utf8_origin_offsets is used to store the offset of each utf8 character in the original string.
4499
        // for example, if the original string is "丝多a睿", utf8_origin_offsets will be {0, 3, 6, 7}.
4500
6
        std::vector<size_t> utf8_origin_offsets;
4501
12
        for (size_t i = 0; i < input_rows_count; i++) {
4502
6
            origin_str = col_origin->get_data_at(index_check_const<origin_const>(i));
4503
            // pos is 1-based index,so we need to minus 1
4504
6
            const auto pos = col_pos[index_check_const<pos_const>(i)] - 1;
4505
6
            const auto len = col_len[index_check_const<len_const>(i)];
4506
6
            insert_str = col_insert->get_data_at(index_check_const<insert_const>(i));
4507
6
            utf8_origin_offsets.clear();
4508
4509
44
            for (size_t ni = 0, char_size = 0; ni < origin_str.size; ni += char_size) {
4510
38
                utf8_origin_offsets.push_back(ni);
4511
38
                char_size = get_utf8_byte_length(origin_str.data[ni]);
4512
38
            }
4513
4514
6
            const size_t utf8_origin_size = utf8_origin_offsets.size();
4515
4516
6
            if (pos >= utf8_origin_size || pos < 0) {
4517
                // If pos is not within the length of the string, the original string is returned.
4518
3
                col_res->insert_data(origin_str.data, origin_str.size);
4519
3
                continue;
4520
3
            }
4521
3
            col_res_chars.insert(
4522
3
                    origin_str.data,
4523
3
                    origin_str.data +
4524
3
                            utf8_origin_offsets[pos]); // copy origin_str with index 0 to pos - 1
4525
3
            if (pos + len >= utf8_origin_size || len < 0) {
4526
1
                col_res_chars.insert(insert_str.begin(),
4527
1
                                     insert_str.end()); // copy all of insert_str.
4528
2
            } else {
4529
2
                col_res_chars.insert(insert_str.begin(),
4530
2
                                     insert_str.end()); // copy all of insert_str.
4531
2
                col_res_chars.insert(
4532
2
                        origin_str.data + utf8_origin_offsets[pos + len],
4533
2
                        origin_str.end()); // copy origin_str from pos+len-1 to the end of the line.
4534
2
            }
4535
3
            ColumnString::check_chars_length(col_res_chars.size(), col_res_offsets.size());
4536
3
            col_res_offsets.push_back(col_res_chars.size());
4537
3
        }
4538
6
    }
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb0ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb0ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb0EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
Unexecuted instantiation: _ZN5doris15FunctionOverlay11vector_utf8ILb1ELb1ELb1ELb1EEEvPKNS_9ColumnStrIjEEPKiS7_S5_RNS_3COWINS_7IColumnEE11mutable_ptrIS3_EEm
4539
};
4540
4541
class FunctionNgramSearch : public IFunction {
4542
public:
4543
    static constexpr auto name = "ngram_search";
4544
8
    static FunctionPtr create() { return std::make_shared<FunctionNgramSearch>(); }
4545
1
    String get_name() const override { return name; }
4546
0
    size_t get_number_of_arguments() const override { return 3; }
4547
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4548
0
        return std::make_shared<DataTypeFloat64>();
4549
0
    }
4550
4551
    // ngram_search(text,pattern,gram_num)
4552
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4553
0
                        uint32_t result, size_t input_rows_count) const override {
4554
0
        CHECK_EQ(arguments.size(), 3);
4555
0
        auto col_res = ColumnFloat64::create();
4556
0
        bool col_const[3];
4557
0
        ColumnPtr argument_columns[3];
4558
0
        for (int i = 0; i < 3; ++i) {
4559
0
            std::tie(argument_columns[i], col_const[i]) =
4560
0
                    unpack_if_const(block.get_by_position(arguments[i]).column);
4561
0
        }
4562
        // There is no need to check if the 2-th,3-th parameters are const here because fe has already checked them.
4563
0
        auto pattern = assert_cast<const ColumnString*>(argument_columns[1].get())->get_data_at(0);
4564
0
        auto gram_num = assert_cast<const ColumnInt32*>(argument_columns[2].get())->get_element(0);
4565
0
        const auto* text_col = assert_cast<const ColumnString*>(argument_columns[0].get());
4566
4567
0
        if (col_const[0]) {
4568
0
            _execute_impl<true>(text_col, pattern, gram_num, *col_res, input_rows_count);
4569
0
        } else {
4570
0
            _execute_impl<false>(text_col, pattern, gram_num, *col_res, input_rows_count);
4571
0
        }
4572
4573
0
        block.replace_by_position(result, std::move(col_res));
4574
0
        return Status::OK();
4575
0
    }
4576
4577
private:
4578
    using NgramMap = phmap::flat_hash_map<uint32_t, uint8_t>;
4579
    // In the map, the key is the CRC32 hash result of a substring in the string,
4580
    // and the value indicates whether this hash is found in the text or pattern.
4581
    constexpr static auto not_found = 0b00;
4582
    constexpr static auto found_in_pattern = 0b01;
4583
    constexpr static auto found_in_text = 0b10;
4584
    constexpr static auto found_in_pattern_and_text = 0b11;
4585
4586
0
    uint32_t sub_str_hash(const char* data, int32_t length) const {
4587
0
        constexpr static uint32_t seed = 0;
4588
0
        return crc32c::Extend(seed, (const uint8_t*)data, length);
4589
0
    }
4590
4591
    template <bool column_const>
4592
    void _execute_impl(const ColumnString* text_col, StringRef& pattern, int gram_num,
4593
0
                       ColumnFloat64& res, size_t size) const {
4594
0
        auto& res_data = res.get_data();
4595
0
        res_data.resize_fill(size, 0);
4596
        // If the length of the pattern is less than gram_num, return 0.
4597
0
        if (pattern.size < gram_num) {
4598
0
            return;
4599
0
        }
4600
4601
        // Build a map by pattern string, which will be used repeatedly in the following loop.
4602
0
        NgramMap pattern_map;
4603
0
        int pattern_count = get_pattern_set(pattern_map, pattern, gram_num);
4604
        // Each time a loop is executed, the map will be modified, so it needs to be restored afterward.
4605
0
        std::vector<uint32_t> restore_map;
4606
4607
0
        for (int i = 0; i < size; i++) {
4608
0
            auto text = text_col->get_data_at(index_check_const<column_const>(i));
4609
0
            if (text.size < gram_num) {
4610
                // If the length of the text is less than gram_num, return 0.
4611
0
                continue;
4612
0
            }
4613
0
            restore_map.reserve(text.size);
4614
0
            auto [text_count, intersection_count] =
4615
0
                    get_text_set(text, gram_num, pattern_map, restore_map);
4616
4617
            // 2 * |Intersection| / (|text substr set| + |pattern substr set|)
4618
0
            res_data[i] = 2.0 * intersection_count / (text_count + pattern_count);
4619
0
        }
4620
0
    }
Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb1EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm
Unexecuted instantiation: _ZNK5doris19FunctionNgramSearch13_execute_implILb0EEEvPKNS_9ColumnStrIjEERNS_9StringRefEiRNS_12ColumnVectorILNS_13PrimitiveTypeE9EEEm
4621
4622
0
    size_t get_pattern_set(NgramMap& pattern_map, StringRef& pattern, int gram_num) const {
4623
0
        size_t pattern_count = 0;
4624
0
        for (int i = 0; i + gram_num <= pattern.size; i++) {
4625
0
            uint32_t cur_hash = sub_str_hash(pattern.data + i, gram_num);
4626
0
            if (!pattern_map.contains(cur_hash)) {
4627
0
                pattern_map[cur_hash] = found_in_pattern;
4628
0
                pattern_count++;
4629
0
            }
4630
0
        }
4631
0
        return pattern_count;
4632
0
    }
4633
4634
    std::pair<size_t, size_t> get_text_set(StringRef& text, int gram_num, NgramMap& pattern_map,
4635
0
                                           std::vector<uint32_t>& restore_map) const {
4636
0
        restore_map.clear();
4637
        //intersection_count indicates a substring both in pattern and text.
4638
0
        size_t text_count = 0, intersection_count = 0;
4639
0
        for (int i = 0; i + gram_num <= text.size; i++) {
4640
0
            uint32_t cur_hash = sub_str_hash(text.data + i, gram_num);
4641
0
            auto& val = pattern_map[cur_hash];
4642
0
            if (val == not_found) {
4643
0
                val ^= found_in_text;
4644
0
                DCHECK(val == found_in_text);
4645
                // only found in text
4646
0
                text_count++;
4647
0
                restore_map.push_back(cur_hash);
4648
0
            } else if (val == found_in_pattern) {
4649
0
                val ^= found_in_text;
4650
0
                DCHECK(val == found_in_pattern_and_text);
4651
                // found in text and pattern
4652
0
                text_count++;
4653
0
                intersection_count++;
4654
0
                restore_map.push_back(cur_hash);
4655
0
            }
4656
0
        }
4657
        // Restore the pattern_map.
4658
0
        for (auto& restore_hash : restore_map) {
4659
0
            pattern_map[restore_hash] ^= found_in_text;
4660
0
        }
4661
4662
0
        return {text_count, intersection_count};
4663
0
    }
4664
};
4665
4666
class FunctionTranslate : public IFunction {
4667
public:
4668
    static constexpr auto name = "translate";
4669
    using AsciiMap = std::array<UInt8, 128>;
4670
    constexpr static UInt8 DELETE_CHAR = 255; // 255 means delete this char
4671
8
    static FunctionPtr create() { return std::make_shared<FunctionTranslate>(); }
4672
1
    String get_name() const override { return name; }
4673
0
    size_t get_number_of_arguments() const override { return 3; }
4674
4675
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4676
0
        return std::make_shared<DataTypeString>();
4677
0
    };
4678
4679
7
    DataTypes get_variadic_argument_types_impl() const override {
4680
7
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>(),
4681
7
                std::make_shared<DataTypeString>()};
4682
7
    }
4683
4684
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4685
0
                        uint32_t result, size_t input_rows_count) const override {
4686
0
        CHECK_EQ(arguments.size(), 3);
4687
0
        auto col_res = ColumnString::create();
4688
0
        bool col_const[3];
4689
0
        ColumnPtr argument_columns[3];
4690
0
        for (int i = 0; i < 3; ++i) {
4691
0
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
4692
0
        }
4693
0
        argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>(
4694
0
                                                     *block.get_by_position(arguments[0]).column)
4695
0
                                                     .convert_to_full_column()
4696
0
                                           : block.get_by_position(arguments[0]).column;
4697
0
        default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments);
4698
4699
0
        const auto* col_source = assert_cast<const ColumnString*>(argument_columns[0].get());
4700
0
        const auto* col_from = assert_cast<const ColumnString*>(argument_columns[1].get());
4701
0
        const auto* col_to = assert_cast<const ColumnString*>(argument_columns[2].get());
4702
4703
0
        bool is_ascii = col_source->is_ascii() && col_from->is_ascii() && col_to->is_ascii();
4704
0
        auto impl_vectors = impl_vectors_utf8<false>;
4705
0
        if (col_const[1] && col_const[2] && is_ascii) {
4706
0
            impl_vectors = impl_vectors_ascii<true>;
4707
0
        } else if (col_const[1] && col_const[2]) {
4708
0
            impl_vectors = impl_vectors_utf8<true>;
4709
0
        } else if (is_ascii) {
4710
0
            impl_vectors = impl_vectors_ascii<false>;
4711
0
        }
4712
0
        impl_vectors(col_source, col_from, col_to, col_res.get());
4713
0
        block.get_by_position(result).column = std::move(col_res);
4714
0
        return Status::OK();
4715
0
    }
4716
4717
private:
4718
    template <bool IsConst>
4719
    static void impl_vectors_ascii(const ColumnString* col_source, const ColumnString* col_from,
4720
0
                                   const ColumnString* col_to, ColumnString* col_res) {
4721
0
        auto& res_chars = col_res->get_chars();
4722
0
        auto& res_offsets = col_res->get_offsets();
4723
0
        res_chars.reserve(col_source->get_chars().size());
4724
0
        res_offsets.reserve(col_source->get_offsets().size());
4725
0
        DCHECK_EQ(col_res->size(), 0);
4726
0
        AsciiMap map;
4727
0
        if (IsConst) {
4728
0
            const auto& from_str = col_from->get_data_at(0);
4729
0
            const auto& to_str = col_to->get_data_at(0);
4730
0
            if (!build_translate_map_ascii(map, from_str, to_str)) {
4731
                // if the map is not need delete char, we can directly copy the source string,then use map to translate
4732
0
                res_offsets.insert(col_source->get_offsets().begin(),
4733
0
                                   col_source->get_offsets().end());
4734
0
                res_chars.insert(col_source->get_chars().begin(), col_source->get_chars().end());
4735
0
                for (int i = 0; i < res_chars.size(); ++i) {
4736
0
                    res_chars[i] = map[res_chars[i]]; // translate the chars
4737
0
                }
4738
0
                return; // no need to translate
4739
0
            }
4740
0
        }
4741
4742
0
        auto res_size = 0;
4743
0
        auto* begin_data = col_res->get_chars().data();
4744
0
        for (size_t i = 0; i < col_source->size(); ++i) {
4745
0
            const auto& source_str = col_source->get_data_at(i);
4746
0
            if (!IsConst) {
4747
0
                const auto& from_str = col_from->get_data_at(i);
4748
0
                const auto& to_str = col_to->get_data_at(i);
4749
0
                build_translate_map_ascii(map, from_str, to_str);
4750
0
            }
4751
0
            auto* dst_data = begin_data + res_size;
4752
0
            res_size += translate_ascii(source_str, map, dst_data);
4753
4754
0
            res_offsets.push_back(res_size);
4755
0
        }
4756
0
        DCHECK_GE(res_chars.capacity(), res_size);
4757
0
        res_chars.resize(res_size);
4758
0
    }
Unexecuted instantiation: _ZN5doris17FunctionTranslate18impl_vectors_asciiILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Unexecuted instantiation: _ZN5doris17FunctionTranslate18impl_vectors_asciiILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
4759
4760
    // return true if no need delete char
4761
    bool static build_translate_map_ascii(AsciiMap& map, const StringRef& from_str,
4762
0
                                          const StringRef& to_str) {
4763
0
        for (size_t i = 0; i < map.size(); ++i) {
4764
0
            map[i] = i; // initialize map to identity
4765
0
        }
4766
0
        std::array<UInt8, 128> set_map {0};
4767
0
        const auto min_size = std::min(from_str.size, to_str.size);
4768
        // all ascii characters are in the range [0, 127]
4769
0
        for (size_t i = 0; i < min_size; ++i) {
4770
0
            auto from_char = from_str.data[i];
4771
0
            auto to_char = to_str.data[i];
4772
0
            if (set_map[from_char] == 0) {
4773
0
                set_map[from_char] = 1;
4774
0
                map[from_char] = to_char;
4775
0
            }
4776
0
        }
4777
4778
0
        bool need_delete_char = false;
4779
4780
0
        for (size_t i = min_size; i < from_str.size; ++i) {
4781
0
            auto from_char = from_str.data[i];
4782
0
            if (set_map[from_char] == 0) {
4783
0
                set_map[from_char] = 1;
4784
0
                map[from_char] = DELETE_CHAR; // delete this char
4785
0
                need_delete_char = true;
4786
0
            }
4787
0
        }
4788
0
        return need_delete_char;
4789
0
    }
4790
4791
0
    static size_t translate_ascii(const StringRef& source_str, AsciiMap& map, UInt8* dst_data) {
4792
0
        auto* begin_data = dst_data;
4793
0
        for (size_t i = 0; i < source_str.size; ++i) {
4794
0
            auto c = source_str.data[i];
4795
0
            if (map[c] == DELETE_CHAR) {
4796
0
                continue; // delete this char
4797
0
            }
4798
0
            *dst_data++ = map[c];
4799
0
        }
4800
0
        return dst_data - begin_data;
4801
0
    }
4802
4803
    template <bool IsConst>
4804
    static void impl_vectors_utf8(const ColumnString* col_source, const ColumnString* col_from,
4805
0
                                  const ColumnString* col_to, ColumnString* col_res) {
4806
0
        col_res->get_chars().reserve(col_source->get_chars().size());
4807
0
        col_res->get_offsets().reserve(col_source->get_offsets().size());
4808
0
        std::unordered_map<std::string_view, std::string_view> translate_map;
4809
0
        if (IsConst) {
4810
0
            const auto& from_str = col_from->get_data_at(0);
4811
0
            const auto& to_str = col_to->get_data_at(0);
4812
0
            translate_map =
4813
0
                    build_translate_map_utf8(from_str.to_string_view(), to_str.to_string_view());
4814
0
        }
4815
0
        for (size_t i = 0; i < col_source->size(); ++i) {
4816
0
            const auto& source_str = col_source->get_data_at(i);
4817
0
            if (!IsConst) {
4818
0
                const auto& from_str = col_from->get_data_at(i);
4819
0
                const auto& to_str = col_to->get_data_at(i);
4820
0
                translate_map = build_translate_map_utf8(from_str.to_string_view(),
4821
0
                                                         to_str.to_string_view());
4822
0
            }
4823
0
            auto translated_str = translate_utf8(source_str.to_string_view(), translate_map);
4824
0
            col_res->insert_data(translated_str.data(), translated_str.size());
4825
0
        }
4826
0
    }
Unexecuted instantiation: _ZN5doris17FunctionTranslate17impl_vectors_utf8ILb0EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
Unexecuted instantiation: _ZN5doris17FunctionTranslate17impl_vectors_utf8ILb1EEEvPKNS_9ColumnStrIjEES5_S5_PS3_
4827
4828
    static std::unordered_map<std::string_view, std::string_view> build_translate_map_utf8(
4829
0
            const std::string_view& from_str, const std::string_view& to_str) {
4830
0
        std::unordered_map<std::string_view, std::string_view> translate_map;
4831
0
        for (size_t i = 0, from_char_size = 0, j = 0, to_char_size = 0; i < from_str.size();
4832
0
             i += from_char_size, j += to_char_size) {
4833
0
            from_char_size = get_utf8_byte_length(from_str[i]);
4834
0
            to_char_size = j < to_str.size() ? get_utf8_byte_length(to_str[j]) : 0;
4835
0
            auto from_char = from_str.substr(i, from_char_size);
4836
0
            if (translate_map.find(from_char) == translate_map.end()) {
4837
0
                translate_map[from_char] =
4838
0
                        j < to_str.size() ? to_str.substr(j, to_char_size) : std::string_view();
4839
0
            }
4840
0
        }
4841
0
        return translate_map;
4842
0
    }
4843
4844
    static std::string translate_utf8(
4845
            const std::string_view& source_str,
4846
0
            std::unordered_map<std::string_view, std::string_view>& translate_map) {
4847
0
        std::string result;
4848
0
        result.reserve(source_str.size());
4849
0
        for (size_t i = 0, char_size = 0; i < source_str.size(); i += char_size) {
4850
0
            char_size = get_utf8_byte_length(source_str[i]);
4851
0
            auto c = source_str.substr(i, char_size);
4852
0
            if (translate_map.find(c) != translate_map.end()) {
4853
0
                if (!translate_map[c].empty()) {
4854
0
                    result.append(translate_map[c]);
4855
0
                }
4856
0
            } else {
4857
0
                result.append(c);
4858
0
            }
4859
0
        }
4860
0
        return result;
4861
0
    }
4862
};
4863
4864
/// xpath_string(xml, xpath) -> String
4865
/// Returns the text content of the first node that matches the XPath expression.
4866
/// Returns NULL if either xml or xpath is NULL.
4867
/// Returns empty string if the XPath expression matches no nodes.
4868
/// The text content includes the node and all its descendants.
4869
/// Example:
4870
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[1]') = 'b1'
4871
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/b[2]') = 'b2'
4872
///   xpath_string('<a><b>b1</b><b>b2</b></a>', '/a/c') = ''
4873
///   xpath_string('invalid xml', '/a/b[1]') = NULL
4874
///   xpath_string(NULL, '/a/b[1]') = NULL
4875
///   xpath_string('<a><b>b1</b><b>b2</b></a>', NULL) = NULL
4876
class FunctionXPathString : public IFunction {
4877
public:
4878
    static constexpr auto name = "xpath_string";
4879
84
    static FunctionPtr create() { return std::make_shared<FunctionXPathString>(); }
4880
1
    String get_name() const override { return name; }
4881
76
    size_t get_number_of_arguments() const override { return 2; }
4882
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
4883
76
        return make_nullable(std::make_shared<DataTypeString>());
4884
76
    }
4885
4886
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
4887
67
                        uint32_t result, size_t input_rows_count) const override {
4888
67
        CHECK_EQ(arguments.size(), 2);
4889
67
        auto col_res = ColumnNullable::create(ColumnString::create(), ColumnUInt8::create());
4890
67
        const auto& [left_col, left_const] =
4891
67
                unpack_if_const(block.get_by_position(arguments[0]).column);
4892
67
        const auto& [right_col, right_const] =
4893
67
                unpack_if_const(block.get_by_position(arguments[1]).column);
4894
67
        const auto& xml_col = *assert_cast<const ColumnString*>(left_col.get());
4895
67
        const auto& xpath_col = *assert_cast<const ColumnString*>(right_col.get());
4896
4897
67
        Status status;
4898
67
        if (left_const && right_const) {
4899
0
            status = execute_vector<true, true>(input_rows_count, xml_col, xpath_col, *col_res);
4900
67
        } else if (left_const) {
4901
22
            status = execute_vector<true, false>(input_rows_count, xml_col, xpath_col, *col_res);
4902
45
        } else if (right_const) {
4903
22
            status = execute_vector<false, true>(input_rows_count, xml_col, xpath_col, *col_res);
4904
23
        } else {
4905
23
            status = execute_vector<false, false>(input_rows_count, xml_col, xpath_col, *col_res);
4906
23
        }
4907
67
        if (!status.ok()) {
4908
0
            return status;
4909
0
        }
4910
4911
67
        block.get_by_position(result).column = std::move(col_res);
4912
67
        return Status::OK();
4913
67
    }
4914
4915
private:
4916
81
    static Status parse_xml(const StringRef& xml_str, pugi::xml_document& xml_doc) {
4917
81
        pugi::xml_parse_result result = xml_doc.load_buffer(xml_str.data, xml_str.size);
4918
81
        if (!result) {
4919
0
            return Status::InvalidArgument("Function {} failed to parse XML string: {}", name,
4920
0
                                           result.description());
4921
0
        }
4922
81
        return Status::OK();
4923
81
    }
4924
4925
84
    static Status build_xpath_query(const StringRef& xpath_str, pugi::xpath_query& xpath_query) {
4926
        // xpath_query will throws xpath_exception on compilation errors.
4927
84
        try {
4928
            // NOTE!!!: don't use to_string_view(), because xpath_str maybe not null-terminated
4929
84
            xpath_query = pugi::xpath_query(xpath_str.to_string().c_str());
4930
84
        } catch (const pugi::xpath_exception& e) {
4931
0
            return Status::InvalidArgument("Function {} failed to build XPath query: {}", name,
4932
0
                                           e.what());
4933
0
        }
4934
84
        return Status::OK();
4935
84
    }
4936
4937
    template <bool left_const, bool right_const>
4938
    static Status execute_vector(const size_t input_rows_count, const ColumnString& xml_col,
4939
67
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
67
        pugi::xml_document xml_doc;
4941
67
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
67
        if constexpr (right_const) {
4944
22
            auto xpath_str = xpath_col.get_data_at(0);
4945
22
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
1
                res_col.insert_many_defaults(input_rows_count);
4948
1
                return Status::OK();
4949
1
            }
4950
21
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
21
        }
4952
22
        if constexpr (left_const) {
4953
22
            auto xml_str = xml_col.get_data_at(0);
4954
22
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
1
                res_col.insert_many_defaults(input_rows_count);
4957
1
                return Status::OK();
4958
1
            }
4959
21
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
21
        }
4961
4962
156
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
89
            if constexpr (!right_const) {
4964
68
                auto xpath_str = xpath_col.get_data_at(i);
4965
68
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
5
                    res_col.insert_default();
4968
5
                    continue;
4969
5
                }
4970
63
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
63
            }
4972
68
            if constexpr (!left_const) {
4973
68
                auto xml_str = xml_col.get_data_at(i);
4974
68
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
4
                    res_col.insert_default();
4977
4
                    continue;
4978
4
                }
4979
64
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
64
            }
4981
64
            std::string text;
4982
89
            try {
4983
89
                text = xpath_query.evaluate_string(xml_doc);
4984
89
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
80
            res_col.insert_data(text.data(), text.size());
4989
80
        }
4990
67
        return Status::OK();
4991
67
    }
Unexecuted instantiation: _ZN5doris19FunctionXPathString14execute_vectorILb1ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
_ZN5doris19FunctionXPathString14execute_vectorILb1ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
4939
22
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
22
        pugi::xml_document xml_doc;
4941
22
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
        if constexpr (right_const) {
4944
            auto xpath_str = xpath_col.get_data_at(0);
4945
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
                res_col.insert_many_defaults(input_rows_count);
4948
                return Status::OK();
4949
            }
4950
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
        }
4952
22
        if constexpr (left_const) {
4953
22
            auto xml_str = xml_col.get_data_at(0);
4954
22
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
1
                res_col.insert_many_defaults(input_rows_count);
4957
1
                return Status::OK();
4958
1
            }
4959
21
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
21
        }
4961
4962
43
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
21
            if constexpr (!right_const) {
4964
21
                auto xpath_str = xpath_col.get_data_at(i);
4965
21
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
1
                    res_col.insert_default();
4968
1
                    continue;
4969
1
                }
4970
20
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
20
            }
4972
            if constexpr (!left_const) {
4973
                auto xml_str = xml_col.get_data_at(i);
4974
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
                    res_col.insert_default();
4977
                    continue;
4978
                }
4979
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
            }
4981
21
            std::string text;
4982
21
            try {
4983
21
                text = xpath_query.evaluate_string(xml_doc);
4984
21
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
20
            res_col.insert_data(text.data(), text.size());
4989
20
        }
4990
22
        return Status::OK();
4991
22
    }
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb1EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
4939
22
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
22
        pugi::xml_document xml_doc;
4941
22
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
22
        if constexpr (right_const) {
4944
22
            auto xpath_str = xpath_col.get_data_at(0);
4945
22
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
1
                res_col.insert_many_defaults(input_rows_count);
4948
1
                return Status::OK();
4949
1
            }
4950
21
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
21
        }
4952
        if constexpr (left_const) {
4953
            auto xml_str = xml_col.get_data_at(0);
4954
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
                res_col.insert_many_defaults(input_rows_count);
4957
                return Status::OK();
4958
            }
4959
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
        }
4961
4962
43
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
            if constexpr (!right_const) {
4964
                auto xpath_str = xpath_col.get_data_at(i);
4965
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
                    res_col.insert_default();
4968
                    continue;
4969
                }
4970
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
            }
4972
21
            if constexpr (!left_const) {
4973
21
                auto xml_str = xml_col.get_data_at(i);
4974
21
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
1
                    res_col.insert_default();
4977
1
                    continue;
4978
1
                }
4979
20
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
20
            }
4981
20
            std::string text;
4982
21
            try {
4983
21
                text = xpath_query.evaluate_string(xml_doc);
4984
21
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
20
            res_col.insert_data(text.data(), text.size());
4989
20
        }
4990
22
        return Status::OK();
4991
22
    }
_ZN5doris19FunctionXPathString14execute_vectorILb0ELb0EEENS_6StatusEmRKNS_9ColumnStrIjEES6_RNS_14ColumnNullableE
Line
Count
Source
4939
23
                                 const ColumnString& xpath_col, ColumnNullable& res_col) {
4940
23
        pugi::xml_document xml_doc;
4941
23
        pugi::xpath_query xpath_query;
4942
        // first check right_const, because we want to check empty input first
4943
        if constexpr (right_const) {
4944
            auto xpath_str = xpath_col.get_data_at(0);
4945
            if (xpath_str.empty()) {
4946
                // should return null if xpath_str is empty
4947
                res_col.insert_many_defaults(input_rows_count);
4948
                return Status::OK();
4949
            }
4950
            RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4951
        }
4952
        if constexpr (left_const) {
4953
            auto xml_str = xml_col.get_data_at(0);
4954
            if (xml_str.empty()) {
4955
                // should return null if xml_str is empty
4956
                res_col.insert_many_defaults(input_rows_count);
4957
                return Status::OK();
4958
            }
4959
            RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4960
        }
4961
4962
70
        for (size_t i = 0; i < input_rows_count; ++i) {
4963
47
            if constexpr (!right_const) {
4964
47
                auto xpath_str = xpath_col.get_data_at(i);
4965
47
                if (xpath_str.empty()) {
4966
                    // should return null if xpath_str is empty
4967
4
                    res_col.insert_default();
4968
4
                    continue;
4969
4
                }
4970
43
                RETURN_IF_ERROR(build_xpath_query(xpath_str, xpath_query));
4971
43
            }
4972
47
            if constexpr (!left_const) {
4973
47
                auto xml_str = xml_col.get_data_at(i);
4974
47
                if (xml_str.empty()) {
4975
                    // should return null if xml_str is empty
4976
3
                    res_col.insert_default();
4977
3
                    continue;
4978
3
                }
4979
44
                RETURN_IF_ERROR(parse_xml(xml_str, xml_doc));
4980
44
            }
4981
44
            std::string text;
4982
47
            try {
4983
47
                text = xpath_query.evaluate_string(xml_doc);
4984
47
            } catch (const pugi::xpath_exception& e) {
4985
0
                return Status::InvalidArgument("Function {} failed to query XPath string: {}", name,
4986
0
                                               e.what());
4987
0
            }
4988
40
            res_col.insert_data(text.data(), text.size());
4989
40
        }
4990
23
        return Status::OK();
4991
23
    }
4992
};
4993
4994
class MakeSetImpl {
4995
public:
4996
    static constexpr auto name = "make_set";
4997
4998
0
    static size_t get_number_of_arguments() { return 0; }
4999
1
    static bool is_variadic() { return true; }
5000
0
    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
5001
0
        if (arguments[0].get()->is_nullable()) {
5002
0
            return make_nullable(std::make_shared<DataTypeString>());
5003
0
        }
5004
0
        return std::make_shared<DataTypeString>();
5005
0
    }
5006
5007
    static bool is_return_nullable(bool has_nullable,
5008
0
                                   const std::vector<ColumnWithConstAndNullMap>& cols_info) {
5009
0
        return cols_info[0].null_map != nullptr;
5010
0
    }
5011
5012
    static bool execute_const_null(ColumnString::MutablePtr& res_col,
5013
                                   PaddedPODArray<UInt8>& res_null_map_data,
5014
0
                                   size_t input_rows_count, size_t null_index) {
5015
0
        if (null_index == 1) {
5016
0
            res_col->insert_many_defaults(input_rows_count);
5017
0
            res_null_map_data.assign(input_rows_count, (UInt8)1);
5018
0
            return true;
5019
0
        }
5020
0
        return false;
5021
0
    }
5022
5023
    static void execute(const std::vector<ColumnWithConstAndNullMap>& column_infos,
5024
                        ColumnString::MutablePtr& res_col, PaddedPODArray<UInt8>& res_null_map_data,
5025
0
                        size_t input_rows_count) {
5026
0
        static constexpr char SEPARATOR = ',';
5027
0
        const auto& bit_data =
5028
0
                assert_cast<const ColumnInt64&>(*column_infos[0].nested_col).get_data();
5029
0
        std::vector<const ColumnString*> str_cols(column_infos.size());
5030
0
        for (size_t i = 1; i < column_infos.size(); ++i) {
5031
0
            str_cols[i] = assert_cast<const ColumnString*>(column_infos[i].nested_col);
5032
0
        }
5033
5034
0
        for (size_t row = 0; row < input_rows_count; ++row) {
5035
0
            if (column_infos[0].is_null_at(row)) {
5036
0
                res_col->insert_default();
5037
0
                res_null_map_data[row] = 1;
5038
0
                continue;
5039
0
            }
5040
5041
0
            uint64_t bit = bit_data[column_infos[0].is_const ? 0 : row];
5042
0
            uint64_t col_pos = __builtin_ffsll(bit);
5043
0
            ColumnString::Chars data;
5044
0
            while (col_pos != 0 && col_pos < column_infos.size() && bit != 0) {
5045
0
                if (!column_infos[col_pos].is_null_at(row)) {
5046
                    /* Here insert `str,` directly to support the case below:
5047
                     * SELECT MAKE_SET(3, '', 'a');
5048
                     * the exception result should be ',a'.
5049
                     */
5050
0
                    auto s_ref = str_cols[col_pos]->get_data_at(
5051
0
                            column_infos[col_pos].is_const ? 0 : row);
5052
0
                    data.insert(s_ref.data, s_ref.data + s_ref.size);
5053
0
                    data.push_back(SEPARATOR);
5054
0
                }
5055
0
                bit &= ~(1ULL << (col_pos - 1));
5056
0
                col_pos = __builtin_ffsll(bit);
5057
0
            }
5058
            // remove the last ','
5059
0
            if (!data.empty()) {
5060
0
                data.pop_back();
5061
0
            }
5062
0
            res_col->insert_data(reinterpret_cast<const char*>(data.data()), data.size());
5063
0
        }
5064
0
    }
5065
};
5066
5067
class FunctionExportSet : public IFunction {
5068
public:
5069
    static constexpr auto name = "export_set";
5070
8
    static FunctionPtr create() { return std::make_shared<FunctionExportSet>(); }
5071
0
    String get_name() const override { return name; }
5072
0
    size_t get_number_of_arguments() const override { return 0; }
5073
1
    bool is_variadic() const override { return true; }
5074
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5075
0
        return std::make_shared<DataTypeString>();
5076
0
    }
5077
5078
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5079
0
                        uint32_t result, size_t input_rows_count) const override {
5080
0
        auto res_col = ColumnString::create();
5081
5082
0
        const size_t arg_size = arguments.size();
5083
0
        bool col_const[5];
5084
0
        ColumnPtr arg_cols[5];
5085
0
        bool all_const = true;
5086
0
        for (int i = 1; i < arg_size; ++i) {
5087
0
            col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column);
5088
0
            all_const = all_const && col_const[i];
5089
0
        }
5090
0
        std::tie(arg_cols[0], col_const[0]) =
5091
0
                unpack_if_const(block.get_by_position(arguments[0]).column);
5092
0
        if (arg_size == 3) {
5093
0
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2}, block, arguments);
5094
0
        } else if (arg_size == 4) {
5095
0
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3}, block, arguments);
5096
0
        } else if (arg_size == 5) {
5097
0
            default_preprocess_parameter_columns(arg_cols, col_const, {1, 2, 3, 4}, block,
5098
0
                                                 arguments);
5099
0
        }
5100
5101
0
        const auto* bit_col = assert_cast<const ColumnInt128*>(arg_cols[0].get());
5102
0
        const auto* on_col = assert_cast<const ColumnString*>(arg_cols[1].get());
5103
0
        const auto* off_col = assert_cast<const ColumnString*>(arg_cols[2].get());
5104
0
        const ColumnString* sep_col = nullptr;
5105
0
        const ColumnInt32* num_bits_col = nullptr;
5106
0
        if (arg_size > 3) {
5107
0
            sep_col = assert_cast<const ColumnString*>(arg_cols[3].get());
5108
0
            if (arg_size == 5) {
5109
0
                num_bits_col = assert_cast<const ColumnInt32*>(arg_cols[4].get());
5110
0
            }
5111
0
        }
5112
5113
0
        for (size_t i = 0; i < input_rows_count; ++i) {
5114
0
            uint64_t bit =
5115
0
                    check_and_get_bit(bit_col->get_element(index_check_const(i, col_const[0])));
5116
5117
0
            size_t idx_for_args = all_const ? 0 : i;
5118
0
            StringRef on = on_col->get_data_at(idx_for_args);
5119
0
            StringRef off = off_col->get_data_at(idx_for_args);
5120
0
            StringRef separator(",", 1);
5121
0
            int8_t num_of_bits = 64;
5122
5123
0
            if (arg_size > 3) {
5124
0
                separator = sep_col->get_data_at(idx_for_args);
5125
0
                if (arg_size == 5) {
5126
0
                    num_of_bits =
5127
0
                            check_and_get_num_of_bits(num_bits_col->get_element(idx_for_args));
5128
0
                }
5129
0
            }
5130
5131
0
            execute_single(bit, on, off, separator, num_of_bits, *res_col);
5132
0
        }
5133
0
        block.replace_by_position(result, std::move(res_col));
5134
0
        return Status::OK();
5135
0
    }
5136
5137
private:
5138
    /* The valid range of the input `bit` parameter should be [-2^63, 2^64 - 1]
5139
     * If it exceeds this range, the MAX/MIN values of the signed 64-bit integer are used for calculation
5140
     * This behavior is consistent with MySQL.
5141
     */
5142
0
    uint64_t check_and_get_bit(__int128 col_bit_val) const {
5143
0
        if (col_bit_val > ULLONG_MAX) {
5144
0
            return LLONG_MAX;
5145
0
        } else if (col_bit_val < LLONG_MIN) {
5146
0
            return LLONG_MIN;
5147
0
        }
5148
0
        return static_cast<uint64_t>(col_bit_val);
5149
0
    }
5150
5151
    // If the input value is not in the range [0, 64], return default value 64
5152
0
    int8_t check_and_get_num_of_bits(int32_t col_num_of_bits_val) const {
5153
0
        if (col_num_of_bits_val >= 0 && col_num_of_bits_val <= 64) {
5154
0
            return static_cast<int8_t>(col_num_of_bits_val);
5155
0
        }
5156
0
        return 64;
5157
0
    }
5158
5159
    void execute_single(uint64_t bit, const StringRef& on, const StringRef& off,
5160
                        const StringRef& separator, int8_t num_of_bits,
5161
0
                        ColumnString& res_col) const {
5162
0
        ColumnString::Chars data;
5163
0
        data.reserve(std::max(on.size, off.size) * num_of_bits +
5164
0
                     separator.size * (num_of_bits - 1));
5165
5166
0
        while (bit && num_of_bits) {
5167
0
            if (bit & 1) {
5168
0
                data.insert(on.data, on.data + on.size);
5169
0
            } else {
5170
0
                data.insert(off.data, off.data + off.size);
5171
0
            }
5172
0
            bit >>= 1;
5173
0
            if (--num_of_bits) {
5174
0
                data.insert(separator.data, separator.data + separator.size);
5175
0
            }
5176
0
        }
5177
5178
0
        if (num_of_bits > 0) {
5179
0
            ColumnString::Chars off_sep_combo;
5180
0
            off_sep_combo.reserve(separator.size + off.size);
5181
0
            off_sep_combo.insert(off_sep_combo.end(), off.data, off.data + off.size);
5182
0
            off_sep_combo.insert(off_sep_combo.end(), separator.data,
5183
0
                                 separator.data + separator.size);
5184
5185
0
            for (size_t i = 0; i < num_of_bits; ++i) {
5186
0
                data.insert(off_sep_combo.data(), off_sep_combo.data() + off_sep_combo.size());
5187
0
            }
5188
0
            data.erase(data.end() - separator.size, data.end());
5189
0
        }
5190
5191
0
        res_col.insert_data(reinterpret_cast<const char*>(data.data()), data.size());
5192
0
    }
5193
};
5194
5195
// ATTN: for debug only
5196
// compute crc32 hash value as the same way in `VOlapTablePartitionParam::find_tablets()`
5197
class FunctionCrc32Internal : public IFunction {
5198
public:
5199
    static constexpr auto name = "crc32_internal";
5200
8
    static FunctionPtr create() { return std::make_shared<FunctionCrc32Internal>(); }
5201
0
    String get_name() const override { return name; }
5202
0
    size_t get_number_of_arguments() const override { return 0; }
5203
1
    bool is_variadic() const override { return true; }
5204
0
    bool use_default_implementation_for_nulls() const override { return false; }
5205
0
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5206
0
        return std::make_shared<DataTypeInt64>();
5207
0
    }
5208
5209
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5210
0
                        uint32_t result, size_t input_rows_count) const override {
5211
0
        DCHECK_GE(arguments.size(), 1);
5212
5213
0
        auto argument_size = arguments.size();
5214
0
        std::vector<ColumnPtr> argument_columns(argument_size);
5215
0
        std::vector<PrimitiveType> argument_primitive_types(argument_size);
5216
5217
0
        for (size_t i = 0; i < argument_size; ++i) {
5218
0
            argument_columns[i] =
5219
0
                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
5220
0
            argument_primitive_types[i] =
5221
0
                    block.get_by_position(arguments[i]).type->get_primitive_type();
5222
0
        }
5223
5224
0
        auto res_col = ColumnInt64::create();
5225
0
        auto& res_data = res_col->get_data();
5226
0
        res_data.resize_fill(input_rows_count, 0);
5227
5228
0
        for (size_t i = 0; i < input_rows_count; ++i) {
5229
0
            uint32_t hash_val = 0;
5230
0
            for (size_t j = 0; j < argument_size; ++j) {
5231
0
                const auto& column = argument_columns[j];
5232
0
                auto primitive_type = argument_primitive_types[j];
5233
0
                auto val = column->get_data_at(i);
5234
0
                if (val.data != nullptr) {
5235
0
                    hash_val = RawValue::zlib_crc32(val.data, val.size, primitive_type, hash_val);
5236
0
                } else {
5237
0
                    hash_val = HashUtil::zlib_crc_hash_null(hash_val);
5238
0
                }
5239
0
            }
5240
0
            res_data[i] = hash_val;
5241
0
        }
5242
5243
0
        block.replace_by_position(result, std::move(res_col));
5244
0
        return Status::OK();
5245
0
    }
5246
};
5247
5248
class FunctionUnicodeNormalize : public IFunction {
5249
public:
5250
    static constexpr auto name = "unicode_normalize";
5251
5252
15
    static FunctionPtr create() { return std::make_shared<FunctionUnicodeNormalize>(); }
5253
5254
3
    String get_name() const override { return name; }
5255
5256
7
    size_t get_number_of_arguments() const override { return 2; }
5257
5258
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
5259
7
        if (arguments.size() != 2 || !is_string_type(arguments[0]->get_primitive_type()) ||
5260
7
            !is_string_type(arguments[1]->get_primitive_type())) {
5261
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
5262
0
                                   "Illegal type {} and {} of arguments of function {}",
5263
0
                                   arguments[0]->get_name(), arguments[1]->get_name(), get_name());
5264
0
        }
5265
7
        return arguments[0];
5266
7
    }
5267
5268
10
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
5269
5270
12
    Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
5271
12
        if (scope == FunctionContext::THREAD_LOCAL) {
5272
5
            return Status::OK();
5273
5
        }
5274
5275
7
        if (!context->is_col_constant(1)) {
5276
1
            return Status::InvalidArgument(
5277
1
                    "The second argument 'mode' of function {} must be constant", get_name());
5278
1
        }
5279
5280
6
        auto* const_col = context->get_constant_col(1);
5281
6
        auto mode_ref = const_col->column_ptr->get_data_at(0);
5282
6
        std::string lower_mode = doris::to_lower(std::string(doris::trim(mode_ref.to_string())));
5283
5284
6
        UErrorCode status = U_ZERO_ERROR;
5285
6
        const icu::Normalizer2* normalizer = nullptr;
5286
5287
6
        if (lower_mode == "nfc") {
5288
2
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfc", UNORM2_COMPOSE, status);
5289
4
        } else if (lower_mode == "nfd") {
5290
1
            normalizer = icu::Normalizer2::getNFDInstance(status);
5291
3
        } else if (lower_mode == "nfkc") {
5292
0
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc", UNORM2_COMPOSE, status);
5293
3
        } else if (lower_mode == "nfkd") {
5294
1
            normalizer = icu::Normalizer2::getNFKDInstance(status);
5295
2
        } else if (lower_mode == "nfkc_cf") {
5296
1
            normalizer = icu::Normalizer2::getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, status);
5297
1
        } else {
5298
1
            return Status::InvalidArgument(
5299
1
                    "Invalid normalization mode '{}' for function {}. "
5300
1
                    "Supported modes: NFC, NFD, NFKC, NFKD, NFKC_CF",
5301
1
                    lower_mode, get_name());
5302
1
        }
5303
5304
5
        if (U_FAILURE(status) || normalizer == nullptr) {
5305
0
            return Status::InvalidArgument(
5306
0
                    "Failed to get normalizer instance for mode '{}' in function {}: {}",
5307
0
                    lower_mode, get_name(), u_errorName(status));
5308
0
        }
5309
5310
5
        auto state = std::make_shared<UnicodeNormalizeState>();
5311
5
        state->normalizer = normalizer;
5312
5
        context->set_function_state(scope, state);
5313
5
        return Status::OK();
5314
5
    }
5315
5316
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
5317
5
                        uint32_t result, size_t input_rows_count) const override {
5318
5
        auto* state = reinterpret_cast<UnicodeNormalizeState*>(
5319
5
                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
5320
5
        if (state == nullptr || state->normalizer == nullptr) {
5321
0
            return Status::RuntimeError("unicode_normalize function state is not initialized");
5322
0
        }
5323
5324
5
        ColumnPtr col =
5325
5
                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
5326
5
        const auto* col_str = check_and_get_column<ColumnString>(col.get());
5327
5
        if (col_str == nullptr) {
5328
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
5329
0
                                        block.get_by_position(arguments[0]).column->get_name(),
5330
0
                                        get_name());
5331
0
        }
5332
5333
5
        const auto& data = col_str->get_chars();
5334
5
        const auto& offsets = col_str->get_offsets();
5335
5336
5
        auto res = ColumnString::create();
5337
5
        auto& res_data = res->get_chars();
5338
5
        auto& res_offsets = res->get_offsets();
5339
5340
5
        size_t rows = offsets.size();
5341
5
        res_offsets.resize(rows);
5342
5343
5
        std::string tmp;
5344
10
        for (size_t i = 0; i < rows; ++i) {
5345
5
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
5346
5
            size_t len = offsets[i] - offsets[i - 1];
5347
5348
5
            normalize_one(state->normalizer, begin, len, tmp);
5349
5
            StringOP::push_value_string(tmp, i, res_data, res_offsets);
5350
5
        }
5351
5352
5
        block.replace_by_position(result, std::move(res));
5353
5
        return Status::OK();
5354
5
    }
5355
5356
private:
5357
    struct UnicodeNormalizeState {
5358
        const icu::Normalizer2* normalizer = nullptr;
5359
    };
5360
5361
    static void normalize_one(const icu::Normalizer2* normalizer, const char* input, size_t length,
5362
5
                              std::string& output) {
5363
5
        if (length == 0) {
5364
0
            output.clear();
5365
0
            return;
5366
0
        }
5367
5368
5
        icu::StringPiece sp(input, static_cast<int32_t>(length));
5369
5
        icu::UnicodeString src16 = icu::UnicodeString::fromUTF8(sp);
5370
5371
5
        UErrorCode status = U_ZERO_ERROR;
5372
5
        UNormalizationCheckResult quick = normalizer->quickCheck(src16, status);
5373
5
        if (U_SUCCESS(status) && quick == UNORM_YES) {
5374
2
            output.assign(input, length);
5375
2
            return;
5376
2
        }
5377
5378
3
        icu::UnicodeString result16;
5379
3
        status = U_ZERO_ERROR;
5380
3
        normalizer->normalize(src16, result16, status);
5381
3
        if (U_FAILURE(status)) {
5382
0
            output.assign(input, length);
5383
0
            return;
5384
0
        }
5385
5386
3
        output.clear();
5387
3
        result16.toUTF8String(output);
5388
3
    }
5389
};
5390
5391
#include "common/compile_check_avoid_end.h"
5392
} // namespace doris