Coverage Report

Created: 2026-06-08 06:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <compare>
28
#include <cstddef>
29
#include <cstdint>
30
#include <limits>
31
#include <optional>
32
#include <string>
33
#include <string_view>
34
35
#include "common/cast_set.h"
36
#include "common/status.h"
37
#include "core/column/column.h"
38
#include "core/column/column_string.h"
39
#include "core/data_type/data_type_nullable.h"
40
#include "core/pod_array_fwd.h"
41
#include "core/string_ref.h"
42
#include "exprs/expr_zonemap_filter.h"
43
#include "exprs/function/function_reverse.h"
44
#include "exprs/function/function_string_concat.h"
45
#include "exprs/function/function_string_format.h"
46
#include "exprs/function/function_string_replace.h"
47
#include "exprs/function/function_string_to_string.h"
48
#include "exprs/function/function_totype.h"
49
#include "exprs/function/simple_function_factory.h"
50
#include "exprs/function/string_hex_util.h"
51
#include "util/string_search.hpp"
52
#include "util/url_coding.h"
53
#include "util/utf8_check.h"
54
55
namespace doris {
56
struct NameStringASCII {
57
    static constexpr auto name = "ascii";
58
};
59
60
struct StringASCII {
61
    using ReturnType = DataTypeInt32;
62
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
63
    using Type = String;
64
    using ReturnColumnType = ColumnInt32;
65
66
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
67
54
                         PaddedPODArray<Int32>& res) {
68
54
        auto size = offsets.size();
69
54
        res.resize(size);
70
152
        for (int i = 0; i < size; ++i) {
71
98
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
72
98
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
73
98
        }
74
54
        return Status::OK();
75
54
    }
76
};
77
78
struct NameParseDataSize {
79
    static constexpr auto name = "parse_data_size";
80
};
81
82
static const std::map<std::string_view, Int128> UNITS = {
83
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
84
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
85
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
86
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
87
        {"YB", static_cast<Int128>(1) << 80}};
88
89
struct ParseDataSize {
90
    using ReturnType = DataTypeInt128;
91
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
92
    using Type = String;
93
    using ReturnColumnType = ColumnInt128;
94
95
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
96
50
                         PaddedPODArray<Int128>& res) {
97
50
        auto size = offsets.size();
98
50
        res.resize(size);
99
104
        for (int i = 0; i < size; ++i) {
100
54
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
101
54
            int str_size = offsets[i] - offsets[i - 1];
102
54
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
103
54
        }
104
50
        return Status::OK();
105
50
    }
106
107
54
    static Int128 parse_data_size(const std::string_view& dataSize) {
108
54
        int digit_length = 0;
109
230
        for (char c : dataSize) {
110
230
            if (isdigit(c) || c == '.') {
111
178
                digit_length++;
112
178
            } else {
113
52
                break;
114
52
            }
115
230
        }
116
117
54
        if (digit_length == 0) {
118
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
119
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
120
4
                                   dataSize);
121
4
        }
122
        // 123.45MB--->123.45 : MB
123
50
        double value = 0.0;
124
50
        try {
125
50
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
126
50
        } catch (const std::exception& e) {
127
0
            throw doris::Exception(
128
0
                    ErrorCode::INVALID_ARGUMENT,
129
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
130
0
                    dataSize, e.what());
131
0
        }
132
50
        auto unit = dataSize.substr(digit_length);
133
50
        auto it = UNITS.find(unit);
134
50
        if (it != UNITS.end()) {
135
47
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
136
47
        } else {
137
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
138
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
139
3
                                   dataSize);
140
3
        }
141
50
    }
142
};
143
144
struct NameQuote {
145
    static constexpr auto name = "quote";
146
};
147
148
struct NameQuoteImpl {
149
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
150
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
151
17
        size_t offset_size = offsets.size();
152
17
        ColumnString::Offset pos = 0;
153
17
        res_offsets.resize(offset_size);
154
17
        res_data.resize(data.size() + offset_size * 2);
155
45
        for (int i = 0; i < offset_size; i++) {
156
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
157
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
158
28
            res_data[pos] = '\'';
159
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
160
28
            res_data[pos + size + 1] = '\'';
161
28
            pos += size + 2;
162
28
            res_offsets[i] = pos;
163
28
        }
164
17
        return Status::OK();
165
17
    }
166
};
167
168
struct NameStringLength {
169
    static constexpr auto name = "length";
170
};
171
172
struct StringLengthImpl {
173
    using ReturnType = DataTypeInt32;
174
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
175
    using Type = String;
176
    using ReturnColumnType = ColumnInt32;
177
178
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
179
9.47k
                         PaddedPODArray<Int32>& res) {
180
9.47k
        auto size = offsets.size();
181
9.47k
        res.resize(size);
182
8.29M
        for (int i = 0; i < size; ++i) {
183
8.28M
            int str_size = offsets[i] - offsets[i - 1];
184
8.28M
            res[i] = str_size;
185
8.28M
        }
186
9.47k
        return Status::OK();
187
9.47k
    }
188
};
189
190
struct NameCrc32 {
191
    static constexpr auto name = "crc32";
192
};
193
194
struct Crc32Impl {
195
    using ReturnType = DataTypeInt64;
196
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
197
    using Type = String;
198
    using ReturnColumnType = ColumnInt64;
199
200
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
201
3
                         PaddedPODArray<Int64>& res) {
202
3
        auto size = offsets.size();
203
3
        res.resize(size);
204
6
        for (int i = 0; i < size; ++i) {
205
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
206
3
                             offsets[i] - offsets[i - 1]);
207
3
        }
208
3
        return Status::OK();
209
3
    }
210
};
211
212
struct NameStringUtf8Length {
213
    static constexpr auto name = "char_length";
214
};
215
216
struct StringUtf8LengthImpl {
217
    using ReturnType = DataTypeInt32;
218
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
219
    using Type = String;
220
    using ReturnColumnType = ColumnInt32;
221
222
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
223
58
                         PaddedPODArray<Int32>& res) {
224
58
        auto size = offsets.size();
225
58
        res.resize(size);
226
164
        for (int i = 0; i < size; ++i) {
227
106
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
228
106
            int str_size = offsets[i] - offsets[i - 1];
229
106
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
230
106
        }
231
58
        return Status::OK();
232
58
    }
233
};
234
235
struct NameIsValidUTF8 {
236
    static constexpr auto name = "is_valid_utf8";
237
};
238
239
struct IsValidUTF8Impl {
240
    using ReturnType = DataTypeUInt8;
241
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
242
    using Type = String;
243
    using ReturnColumnType = ColumnUInt8;
244
245
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
246
39
                         PaddedPODArray<UInt8>& res) {
247
39
        auto size = offsets.size();
248
39
        res.resize(size);
249
98
        for (size_t i = 0; i < size; ++i) {
250
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
251
59
            size_t str_size = offsets[i] - offsets[i - 1];
252
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
253
59
        }
254
39
        return Status::OK();
255
39
    }
256
};
257
258
struct NameStartsWith {
259
    static constexpr auto name = "starts_with";
260
};
261
262
struct StartsWithOp {
263
    using ResultDataType = DataTypeUInt8;
264
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
265
266
11.3k
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
267
11.3k
        res = strl.starts_with(strr);
268
11.3k
    }
269
};
270
271
struct NameEndsWith {
272
    static constexpr auto name = "ends_with";
273
};
274
275
struct EndsWithOp {
276
    using ResultDataType = DataTypeUInt8;
277
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
278
279
142
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
280
142
        res = strl.ends_with(strr);
281
142
    }
282
};
283
284
struct NameFindInSet {
285
    static constexpr auto name = "find_in_set";
286
};
287
288
struct FindInSetOp {
289
    using ResultDataType = DataTypeInt32;
290
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
291
170
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
292
670
        for (const auto& c : strl) {
293
670
            if (c == ',') {
294
21
                res = 0;
295
21
                return;
296
21
            }
297
670
        }
298
299
149
        int32_t token_index = 1;
300
149
        int32_t start = 0;
301
149
        int32_t end;
302
303
253
        do {
304
253
            end = start;
305
            // Position end.
306
1.05k
            while (end < strr.length() && strr[end] != ',') {
307
806
                ++end;
308
806
            }
309
310
253
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
311
93
                res = token_index;
312
93
                return;
313
93
            }
314
315
            // Re-position start and end past ','
316
160
            start = end + 1;
317
160
            ++token_index;
318
160
        } while (start < strr.length());
319
56
        res = 0;
320
56
    }
321
};
322
323
struct NameInstr {
324
    static constexpr auto name = "instr";
325
};
326
327
// LeftDataType and RightDataType are DataTypeString
328
template <typename LeftDataType, typename RightDataType>
329
struct StringInStrImpl {
330
    using ResultDataType = DataTypeInt32;
331
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
332
333
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
334
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
335
72
        StringRef lstr_ref(ldata.data, ldata.size);
336
337
72
        auto size = roffsets.size();
338
72
        res.resize(size);
339
144
        for (int i = 0; i < size; ++i) {
340
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
341
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
342
343
72
            StringRef rstr_ref(r_raw_str, r_str_size);
344
345
72
            res[i] = execute(lstr_ref, rstr_ref);
346
72
        }
347
348
72
        return Status::OK();
349
72
    }
350
351
    static Status vector_scalar(const ColumnString::Chars& ldata,
352
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
353
86
                                ResultPaddedPODArray& res) {
354
86
        auto size = loffsets.size();
355
86
        res.resize(size);
356
357
86
        if (rdata.size == 0) {
358
12
            std::fill(res.begin(), res.end(), 1);
359
12
            return Status::OK();
360
12
        }
361
362
74
        const UInt8* begin = ldata.data();
363
74
        const UInt8* end = begin + ldata.size();
364
74
        const UInt8* pos = begin;
365
366
        /// Current index in the array of strings.
367
74
        size_t i = 0;
368
74
        std::fill(res.begin(), res.end(), 0);
369
370
74
        StringRef rstr_ref(rdata.data, rdata.size);
371
74
        StringSearch search(&rstr_ref);
372
373
90
        while (pos < end) {
374
            // search return matched substring start offset
375
64
            pos = (UInt8*)search.search((char*)pos, end - pos);
376
64
            if (pos >= end) {
377
48
                break;
378
48
            }
379
380
            /// Determine which index it refers to.
381
            /// begin + value_offsets[i] is the start offset of string at i+1
382
16
            while (begin + loffsets[i] < pos) {
383
0
                ++i;
384
0
            }
385
386
            /// We check that the entry does not pass through the boundaries of strings.
387
16
            if (pos + rdata.size <= begin + loffsets[i]) {
388
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
389
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
390
16
                auto len = std::min(l_str_size, loc);
391
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
392
16
                res[i] = loc + 1;
393
16
            }
394
395
            // move to next string offset
396
16
            pos = begin + loffsets[i];
397
16
            ++i;
398
16
        }
399
400
74
        return Status::OK();
401
86
    }
402
403
    static Status vector_vector(const ColumnString::Chars& ldata,
404
                                const ColumnString::Offsets& loffsets,
405
                                const ColumnString::Chars& rdata,
406
207
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
407
207
        DCHECK_EQ(loffsets.size(), roffsets.size());
408
409
207
        auto size = loffsets.size();
410
207
        res.resize(size);
411
661
        for (int i = 0; i < size; ++i) {
412
454
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
413
454
            int l_str_size = loffsets[i] - loffsets[i - 1];
414
454
            StringRef lstr_ref(l_raw_str, l_str_size);
415
416
454
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
417
454
            int r_str_size = roffsets[i] - roffsets[i - 1];
418
454
            StringRef rstr_ref(r_raw_str, r_str_size);
419
420
454
            res[i] = execute(lstr_ref, rstr_ref);
421
454
        }
422
423
207
        return Status::OK();
424
207
    }
425
426
526
    static int execute(const StringRef& strl, const StringRef& strr) {
427
526
        if (strr.size == 0) {
428
71
            return 1;
429
71
        }
430
431
455
        StringSearch search(&strr);
432
        // Hive returns positions starting from 1.
433
455
        int loc = search.search(&strl);
434
455
        if (loc > 0) {
435
43
            int len = std::min(loc, (int)strl.size);
436
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
437
43
        }
438
439
455
        return loc + 1;
440
526
    }
441
};
442
443
// the same impl as instr
444
struct NameLocate {
445
    static constexpr auto name = "locate";
446
};
447
448
// LeftDataType and RightDataType are DataTypeString
449
template <typename LeftDataType, typename RightDataType>
450
struct StringLocateImpl {
451
    using ResultDataType = DataTypeInt32;
452
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
453
454
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
455
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
457
38
                                                                           res);
458
38
    }
459
460
    static Status vector_scalar(const ColumnString::Chars& ldata,
461
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
462
36
                                ResultPaddedPODArray& res) {
463
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
464
36
                                                                           res);
465
36
    }
466
467
    static Status vector_vector(const ColumnString::Chars& ldata,
468
                                const ColumnString::Offsets& loffsets,
469
                                const ColumnString::Chars& rdata,
470
126
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
471
126
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
472
126
                                                                           loffsets, res);
473
126
    }
474
};
475
476
// LeftDataType and RightDataType are DataTypeString
477
template <typename LeftDataType, typename RightDataType, typename OP>
478
struct StringFunctionImpl {
479
    using ResultDataType = typename OP::ResultDataType;
480
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
481
482
    static Status vector_vector(const ColumnString::Chars& ldata,
483
                                const ColumnString::Offsets& loffsets,
484
                                const ColumnString::Chars& rdata,
485
213
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
486
213
        DCHECK_EQ(loffsets.size(), roffsets.size());
487
488
213
        auto size = loffsets.size();
489
213
        res.resize(size);
490
576
        for (int i = 0; i < size; ++i) {
491
363
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
492
363
            int l_str_size = loffsets[i] - loffsets[i - 1];
493
494
363
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
495
363
            int r_str_size = roffsets[i] - roffsets[i - 1];
496
497
363
            std::string_view lview(l_raw_str, l_str_size);
498
363
            std::string_view rview(r_raw_str, r_str_size);
499
500
363
            OP::execute(lview, rview, res[i]);
501
363
        }
502
213
        return Status::OK();
503
213
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
485
88
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
486
88
        DCHECK_EQ(loffsets.size(), roffsets.size());
487
488
88
        auto size = loffsets.size();
489
88
        res.resize(size);
490
215
        for (int i = 0; i < size; ++i) {
491
127
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
492
127
            int l_str_size = loffsets[i] - loffsets[i - 1];
493
494
127
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
495
127
            int r_str_size = roffsets[i] - roffsets[i - 1];
496
497
127
            std::string_view lview(l_raw_str, l_str_size);
498
127
            std::string_view rview(r_raw_str, r_str_size);
499
500
127
            OP::execute(lview, rview, res[i]);
501
127
        }
502
88
        return Status::OK();
503
88
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
485
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
486
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
487
488
61
        auto size = loffsets.size();
489
61
        res.resize(size);
490
175
        for (int i = 0; i < size; ++i) {
491
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
492
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
493
494
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
495
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
496
497
114
            std::string_view lview(l_raw_str, l_str_size);
498
114
            std::string_view rview(r_raw_str, r_str_size);
499
500
114
            OP::execute(lview, rview, res[i]);
501
114
        }
502
61
        return Status::OK();
503
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
485
64
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
486
64
        DCHECK_EQ(loffsets.size(), roffsets.size());
487
488
64
        auto size = loffsets.size();
489
64
        res.resize(size);
490
186
        for (int i = 0; i < size; ++i) {
491
122
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
492
122
            int l_str_size = loffsets[i] - loffsets[i - 1];
493
494
122
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
495
122
            int r_str_size = roffsets[i] - roffsets[i - 1];
496
497
122
            std::string_view lview(l_raw_str, l_str_size);
498
122
            std::string_view rview(r_raw_str, r_str_size);
499
500
122
            OP::execute(lview, rview, res[i]);
501
122
        }
502
64
        return Status::OK();
503
64
    }
504
    static Status vector_scalar(const ColumnString::Chars& ldata,
505
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
506
46
                                ResultPaddedPODArray& res) {
507
46
        auto size = loffsets.size();
508
46
        res.resize(size);
509
46
        std::string_view rview(rdata.data, rdata.size);
510
11.3k
        for (int i = 0; i < size; ++i) {
511
11.2k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
512
11.2k
            int l_str_size = loffsets[i] - loffsets[i - 1];
513
11.2k
            std::string_view lview(l_raw_str, l_str_size);
514
515
11.2k
            OP::execute(lview, rview, res[i]);
516
11.2k
        }
517
46
        return Status::OK();
518
46
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
506
16
                                ResultPaddedPODArray& res) {
507
16
        auto size = loffsets.size();
508
16
        res.resize(size);
509
16
        std::string_view rview(rdata.data, rdata.size);
510
11.2k
        for (int i = 0; i < size; ++i) {
511
11.2k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
512
11.2k
            int l_str_size = loffsets[i] - loffsets[i - 1];
513
11.2k
            std::string_view lview(l_raw_str, l_str_size);
514
515
11.2k
            OP::execute(lview, rview, res[i]);
516
11.2k
        }
517
16
        return Status::OK();
518
16
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
506
14
                                ResultPaddedPODArray& res) {
507
14
        auto size = loffsets.size();
508
14
        res.resize(size);
509
14
        std::string_view rview(rdata.data, rdata.size);
510
28
        for (int i = 0; i < size; ++i) {
511
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
512
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
513
14
            std::string_view lview(l_raw_str, l_str_size);
514
515
14
            OP::execute(lview, rview, res[i]);
516
14
        }
517
14
        return Status::OK();
518
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
506
16
                                ResultPaddedPODArray& res) {
507
16
        auto size = loffsets.size();
508
16
        res.resize(size);
509
16
        std::string_view rview(rdata.data, rdata.size);
510
32
        for (int i = 0; i < size; ++i) {
511
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
512
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
513
16
            std::string_view lview(l_raw_str, l_str_size);
514
515
16
            OP::execute(lview, rview, res[i]);
516
16
        }
517
16
        return Status::OK();
518
16
    }
519
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
520
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
521
44
        auto size = roffsets.size();
522
44
        res.resize(size);
523
44
        std::string_view lview(ldata.data, ldata.size);
524
94
        for (int i = 0; i < size; ++i) {
525
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
526
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
527
50
            std::string_view rview(r_raw_str, r_str_size);
528
529
50
            OP::execute(lview, rview, res[i]);
530
50
        }
531
44
        return Status::OK();
532
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
520
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
521
4
        auto size = roffsets.size();
522
4
        res.resize(size);
523
4
        std::string_view lview(ldata.data, ldata.size);
524
8
        for (int i = 0; i < size; ++i) {
525
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
526
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
527
4
            std::string_view rview(r_raw_str, r_str_size);
528
529
4
            OP::execute(lview, rview, res[i]);
530
4
        }
531
4
        return Status::OK();
532
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
520
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
521
14
        auto size = roffsets.size();
522
14
        res.resize(size);
523
14
        std::string_view lview(ldata.data, ldata.size);
524
28
        for (int i = 0; i < size; ++i) {
525
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
526
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
527
14
            std::string_view rview(r_raw_str, r_str_size);
528
529
14
            OP::execute(lview, rview, res[i]);
530
14
        }
531
14
        return Status::OK();
532
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
520
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
521
26
        auto size = roffsets.size();
522
26
        res.resize(size);
523
26
        std::string_view lview(ldata.data, ldata.size);
524
58
        for (int i = 0; i < size; ++i) {
525
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
526
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
527
32
            std::string_view rview(r_raw_str, r_str_size);
528
529
32
            OP::execute(lview, rview, res[i]);
530
32
        }
531
26
        return Status::OK();
532
26
    }
533
};
534
535
struct NameToLower {
536
    static constexpr auto name = "lower";
537
};
538
539
struct NameToUpper {
540
    static constexpr auto name = "upper";
541
};
542
543
template <typename OpName>
544
struct TransferImpl {
545
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
546
323
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
547
323
        size_t offset_size = offsets.size();
548
323
        if (UNLIKELY(!offset_size)) {
549
0
            return Status::OK();
550
0
        }
551
552
323
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
553
323
        res_offsets.resize(offset_size);
554
323
        if (is_ascii) {
555
263
            memcpy_small_allow_read_write_overflow15(
556
263
                    res_offsets.data(), offsets.data(),
557
263
                    offset_size * sizeof(ColumnString::Offsets::value_type));
558
559
263
            size_t data_length = data.size();
560
263
            res_data.resize(data_length);
561
263
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
562
92
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
563
171
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
564
171
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
565
171
            }
566
263
        } else {
567
60
            execute_utf8(data, offsets, res_data, res_offsets);
568
60
        }
569
570
323
        return Status::OK();
571
323
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
546
192
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
547
192
        size_t offset_size = offsets.size();
548
192
        if (UNLIKELY(!offset_size)) {
549
0
            return Status::OK();
550
0
        }
551
552
192
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
553
192
        res_offsets.resize(offset_size);
554
192
        if (is_ascii) {
555
171
            memcpy_small_allow_read_write_overflow15(
556
171
                    res_offsets.data(), offsets.data(),
557
171
                    offset_size * sizeof(ColumnString::Offsets::value_type));
558
559
171
            size_t data_length = data.size();
560
171
            res_data.resize(data_length);
561
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
562
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
563
171
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
564
171
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
565
171
            }
566
171
        } else {
567
21
            execute_utf8(data, offsets, res_data, res_offsets);
568
21
        }
569
570
192
        return Status::OK();
571
192
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
546
131
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
547
131
        size_t offset_size = offsets.size();
548
131
        if (UNLIKELY(!offset_size)) {
549
0
            return Status::OK();
550
0
        }
551
552
131
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
553
131
        res_offsets.resize(offset_size);
554
131
        if (is_ascii) {
555
92
            memcpy_small_allow_read_write_overflow15(
556
92
                    res_offsets.data(), offsets.data(),
557
92
                    offset_size * sizeof(ColumnString::Offsets::value_type));
558
559
92
            size_t data_length = data.size();
560
92
            res_data.resize(data_length);
561
92
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
562
92
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
563
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
564
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
565
            }
566
92
        } else {
567
39
            execute_utf8(data, offsets, res_data, res_offsets);
568
39
        }
569
570
131
        return Status::OK();
571
131
    }
572
573
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
574
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
575
60
        std::string result;
576
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
577
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
578
138
            uint32_t size = offsets[i] - offsets[i - 1];
579
580
138
            result.clear();
581
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
582
91
                to_upper_utf8(begin, size, result);
583
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
584
47
                to_lower_utf8(begin, size, result);
585
47
            }
586
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
587
138
        }
588
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
574
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
575
21
        std::string result;
576
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
577
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
578
47
            uint32_t size = offsets[i] - offsets[i - 1];
579
580
47
            result.clear();
581
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
582
                to_upper_utf8(begin, size, result);
583
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
584
47
                to_lower_utf8(begin, size, result);
585
47
            }
586
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
587
47
        }
588
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
574
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
575
39
        std::string result;
576
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
577
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
578
91
            uint32_t size = offsets[i] - offsets[i - 1];
579
580
91
            result.clear();
581
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
582
91
                to_upper_utf8(begin, size, result);
583
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
584
                to_lower_utf8(begin, size, result);
585
            }
586
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
587
91
        }
588
39
    }
589
590
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
591
91
        icu::StringPiece sp;
592
91
        sp.set(data, size);
593
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
594
91
        unicode_str.toUpper();
595
91
        unicode_str.toUTF8String(result);
596
91
    }
597
598
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
599
47
        icu::StringPiece sp;
600
47
        sp.set(data, size);
601
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
602
47
        unicode_str.toLower();
603
47
        unicode_str.toUTF8String(result);
604
47
    }
605
};
606
607
// Capitalize first letter
608
struct NameToInitcap {
609
    static constexpr auto name = "initcap";
610
};
611
612
struct InitcapImpl {
613
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
614
172
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
615
172
        res_offsets.resize(offsets.size());
616
617
172
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
618
172
        if (is_ascii) {
619
114
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
620
114
        } else {
621
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
622
58
        }
623
172
        return Status::OK();
624
172
    }
625
626
    static void impl_vectors_ascii(const ColumnString::Chars& data,
627
                                   const ColumnString::Offsets& offsets,
628
                                   ColumnString::Chars& res_data,
629
114
                                   ColumnString::Offsets& res_offsets) {
630
114
        size_t offset_size = offsets.size();
631
114
        memcpy_small_allow_read_write_overflow15(
632
114
                res_offsets.data(), offsets.data(),
633
114
                offset_size * sizeof(ColumnString::Offsets::value_type));
634
635
114
        size_t data_length = data.size();
636
114
        res_data.resize(data_length);
637
114
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
638
639
114
        bool need_capitalize = true;
640
246
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
641
132
            auto end_index = res_offsets[offset_index];
642
132
            need_capitalize = true;
643
644
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
645
1.43k
                if (!::isalnum(res_data[i])) {
646
216
                    need_capitalize = true;
647
1.21k
                } else if (need_capitalize) {
648
                    /*
649
                    https://en.cppreference.com/w/cpp/string/byte/toupper
650
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
651
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
652
                    char my_toupper(char ch)
653
                    {
654
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
655
                    }
656
                    */
657
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
658
267
                    need_capitalize = false;
659
267
                }
660
1.43k
            }
661
662
132
            start_index = end_index;
663
132
        }
664
114
    }
665
666
    static void impl_vectors_utf8(const ColumnString::Chars& data,
667
                                  const ColumnString::Offsets& offsets,
668
                                  ColumnString::Chars& res_data,
669
58
                                  ColumnString::Offsets& res_offsets) {
670
58
        std::string result;
671
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
672
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
673
65
            uint32_t size = offsets[i] - offsets[i - 1];
674
65
            result.clear();
675
65
            to_initcap_utf8(begin, size, result);
676
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
677
65
        }
678
58
    }
679
680
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
681
65
        icu::StringPiece sp;
682
65
        sp.set(data, size);
683
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
684
65
        unicode_str.toLower();
685
65
        icu::UnicodeString output_str;
686
65
        bool need_capitalize = true;
687
65
        icu::StringCharacterIterator iter(unicode_str);
688
647
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
689
582
            if (!u_isalnum(ch)) {
690
105
                need_capitalize = true;
691
477
            } else if (need_capitalize) {
692
87
                ch = u_toupper(ch);
693
87
                need_capitalize = false;
694
87
            }
695
582
            output_str.append(ch);
696
582
        }
697
65
        output_str.toUTF8String(result);
698
65
    }
699
};
700
701
struct NameTrim {
702
    static constexpr auto name = "trim";
703
};
704
struct NameLTrim {
705
    static constexpr auto name = "ltrim";
706
};
707
struct NameRTrim {
708
    static constexpr auto name = "rtrim";
709
};
710
struct NameTrimIn {
711
    static constexpr auto name = "trim_in";
712
};
713
struct NameLTrimIn {
714
    static constexpr auto name = "ltrim_in";
715
};
716
struct NameRTrimIn {
717
    static constexpr auto name = "rtrim_in";
718
};
719
template <bool is_ltrim, bool is_rtrim, bool trim_single>
720
struct TrimUtil {
721
    static Status vector(const ColumnString::Chars& str_data,
722
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
723
300
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
300
        const size_t offset_size = str_offsets.size();
725
300
        res_offsets.resize(offset_size);
726
300
        res_data.reserve(str_data.size());
727
852
        for (size_t i = 0; i < offset_size; ++i) {
728
552
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
552
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
552
            if constexpr (is_ltrim) {
732
335
                str_begin =
733
335
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
335
            }
735
552
            if constexpr (is_rtrim) {
736
395
                str_end =
737
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
395
            }
739
740
552
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
552
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
552
        }
744
300
        return Status::OK();
745
300
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
723
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
58
        const size_t offset_size = str_offsets.size();
725
58
        res_offsets.resize(offset_size);
726
58
        res_data.reserve(str_data.size());
727
178
        for (size_t i = 0; i < offset_size; ++i) {
728
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
120
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
120
            if constexpr (is_ltrim) {
732
120
                str_begin =
733
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
120
            }
735
120
            if constexpr (is_rtrim) {
736
120
                str_end =
737
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
120
            }
739
740
120
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
120
        }
744
58
        return Status::OK();
745
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
723
52
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
52
        const size_t offset_size = str_offsets.size();
725
52
        res_offsets.resize(offset_size);
726
52
        res_data.reserve(str_data.size());
727
148
        for (size_t i = 0; i < offset_size; ++i) {
728
96
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
96
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
96
            if constexpr (is_ltrim) {
732
96
                str_begin =
733
96
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
96
            }
735
            if constexpr (is_rtrim) {
736
                str_end =
737
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
            }
739
740
96
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
96
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
96
        }
744
52
        return Status::OK();
745
52
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
723
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
94
        const size_t offset_size = str_offsets.size();
725
94
        res_offsets.resize(offset_size);
726
94
        res_data.reserve(str_data.size());
727
266
        for (size_t i = 0; i < offset_size; ++i) {
728
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
172
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
            if constexpr (is_ltrim) {
732
                str_begin =
733
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
            }
735
172
            if constexpr (is_rtrim) {
736
172
                str_end =
737
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
172
            }
739
740
172
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
172
        }
744
94
        return Status::OK();
745
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
723
24
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
24
        const size_t offset_size = str_offsets.size();
725
24
        res_offsets.resize(offset_size);
726
24
        res_data.reserve(str_data.size());
727
82
        for (size_t i = 0; i < offset_size; ++i) {
728
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
58
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
58
            if constexpr (is_ltrim) {
732
58
                str_begin =
733
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
58
            }
735
58
            if constexpr (is_rtrim) {
736
58
                str_end =
737
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
58
            }
739
740
58
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
58
        }
744
24
        return Status::OK();
745
24
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
723
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
27
        const size_t offset_size = str_offsets.size();
725
27
        res_offsets.resize(offset_size);
726
27
        res_data.reserve(str_data.size());
727
88
        for (size_t i = 0; i < offset_size; ++i) {
728
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
61
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
61
            if constexpr (is_ltrim) {
732
61
                str_begin =
733
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
61
            }
735
            if constexpr (is_rtrim) {
736
                str_end =
737
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
            }
739
740
61
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
61
        }
744
27
        return Status::OK();
745
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
723
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
724
45
        const size_t offset_size = str_offsets.size();
725
45
        res_offsets.resize(offset_size);
726
45
        res_data.reserve(str_data.size());
727
90
        for (size_t i = 0; i < offset_size; ++i) {
728
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
729
45
            const auto* str_end = str_data.data() + str_offsets[i];
730
731
            if constexpr (is_ltrim) {
732
                str_begin =
733
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
734
            }
735
45
            if constexpr (is_rtrim) {
736
45
                str_end =
737
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
738
45
            }
739
740
45
            res_data.insert_assume_reserved(str_begin, str_end);
741
            // The length of the result of the trim function will never exceed the length of the input.
742
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
743
45
        }
744
45
        return Status::OK();
745
45
    }
746
};
747
template <bool is_ltrim, bool is_rtrim, bool trim_single>
748
struct TrimInUtil {
749
    static Status vector(const ColumnString::Chars& str_data,
750
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
751
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
752
121
        const size_t offset_size = str_offsets.size();
753
121
        res_offsets.resize(offset_size);
754
121
        res_data.reserve(str_data.size());
755
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
756
121
                         simd::VStringFunctions::is_ascii(StringRef(
757
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
758
759
121
        if (all_ascii) {
760
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
761
68
        } else {
762
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
763
53
        }
764
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
751
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
752
43
        const size_t offset_size = str_offsets.size();
753
43
        res_offsets.resize(offset_size);
754
43
        res_data.reserve(str_data.size());
755
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
756
43
                         simd::VStringFunctions::is_ascii(StringRef(
757
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
758
759
43
        if (all_ascii) {
760
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
761
24
        } else {
762
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
763
19
        }
764
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
751
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
752
36
        const size_t offset_size = str_offsets.size();
753
36
        res_offsets.resize(offset_size);
754
36
        res_data.reserve(str_data.size());
755
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
756
36
                         simd::VStringFunctions::is_ascii(StringRef(
757
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
758
759
36
        if (all_ascii) {
760
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
761
19
        } else {
762
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
763
17
        }
764
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
751
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
752
42
        const size_t offset_size = str_offsets.size();
753
42
        res_offsets.resize(offset_size);
754
42
        res_data.reserve(str_data.size());
755
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
756
42
                         simd::VStringFunctions::is_ascii(StringRef(
757
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
758
759
42
        if (all_ascii) {
760
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
761
25
        } else {
762
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
763
17
        }
764
42
    }
765
766
private:
767
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
768
                                     const ColumnString::Offsets& str_offsets,
769
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
770
68
                                     ColumnString::Offsets& res_offsets) {
771
68
        const size_t offset_size = str_offsets.size();
772
68
        std::bitset<128> char_lookup;
773
68
        const char* remove_begin = remove_str.data;
774
68
        const char* remove_end = remove_str.data + remove_str.size;
775
776
251
        while (remove_begin < remove_end) {
777
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
778
183
            remove_begin += 1;
779
183
        }
780
781
136
        for (size_t i = 0; i < offset_size; ++i) {
782
68
            const char* str_begin =
783
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
784
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
785
68
            const char* left_trim_pos = str_begin;
786
68
            const char* right_trim_pos = str_end;
787
788
68
            if constexpr (is_ltrim) {
789
127
                while (left_trim_pos < str_end) {
790
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
791
30
                        break;
792
30
                    }
793
84
                    ++left_trim_pos;
794
84
                }
795
43
            }
796
797
68
            if constexpr (is_rtrim) {
798
114
                while (right_trim_pos > left_trim_pos) {
799
100
                    --right_trim_pos;
800
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
801
35
                        ++right_trim_pos;
802
35
                        break;
803
35
                    }
804
100
                }
805
49
            }
806
807
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
808
            // The length of the result of the trim function will never exceed the length of the input.
809
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
810
68
        }
811
812
68
        return Status::OK();
813
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
770
24
                                     ColumnString::Offsets& res_offsets) {
771
24
        const size_t offset_size = str_offsets.size();
772
24
        std::bitset<128> char_lookup;
773
24
        const char* remove_begin = remove_str.data;
774
24
        const char* remove_end = remove_str.data + remove_str.size;
775
776
86
        while (remove_begin < remove_end) {
777
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
778
62
            remove_begin += 1;
779
62
        }
780
781
48
        for (size_t i = 0; i < offset_size; ++i) {
782
24
            const char* str_begin =
783
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
784
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
785
24
            const char* left_trim_pos = str_begin;
786
24
            const char* right_trim_pos = str_end;
787
788
24
            if constexpr (is_ltrim) {
789
57
                while (left_trim_pos < str_end) {
790
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
791
17
                        break;
792
17
                    }
793
33
                    ++left_trim_pos;
794
33
                }
795
24
            }
796
797
24
            if constexpr (is_rtrim) {
798
39
                while (right_trim_pos > left_trim_pos) {
799
32
                    --right_trim_pos;
800
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
801
17
                        ++right_trim_pos;
802
17
                        break;
803
17
                    }
804
32
                }
805
24
            }
806
807
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
808
            // The length of the result of the trim function will never exceed the length of the input.
809
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
810
24
        }
811
812
24
        return Status::OK();
813
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
770
19
                                     ColumnString::Offsets& res_offsets) {
771
19
        const size_t offset_size = str_offsets.size();
772
19
        std::bitset<128> char_lookup;
773
19
        const char* remove_begin = remove_str.data;
774
19
        const char* remove_end = remove_str.data + remove_str.size;
775
776
73
        while (remove_begin < remove_end) {
777
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
778
54
            remove_begin += 1;
779
54
        }
780
781
38
        for (size_t i = 0; i < offset_size; ++i) {
782
19
            const char* str_begin =
783
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
784
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
785
19
            const char* left_trim_pos = str_begin;
786
19
            const char* right_trim_pos = str_end;
787
788
19
            if constexpr (is_ltrim) {
789
70
                while (left_trim_pos < str_end) {
790
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
791
13
                        break;
792
13
                    }
793
51
                    ++left_trim_pos;
794
51
                }
795
19
            }
796
797
            if constexpr (is_rtrim) {
798
                while (right_trim_pos > left_trim_pos) {
799
                    --right_trim_pos;
800
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
801
                        ++right_trim_pos;
802
                        break;
803
                    }
804
                }
805
            }
806
807
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
808
            // The length of the result of the trim function will never exceed the length of the input.
809
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
810
19
        }
811
812
19
        return Status::OK();
813
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
770
25
                                     ColumnString::Offsets& res_offsets) {
771
25
        const size_t offset_size = str_offsets.size();
772
25
        std::bitset<128> char_lookup;
773
25
        const char* remove_begin = remove_str.data;
774
25
        const char* remove_end = remove_str.data + remove_str.size;
775
776
92
        while (remove_begin < remove_end) {
777
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
778
67
            remove_begin += 1;
779
67
        }
780
781
50
        for (size_t i = 0; i < offset_size; ++i) {
782
25
            const char* str_begin =
783
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
784
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
785
25
            const char* left_trim_pos = str_begin;
786
25
            const char* right_trim_pos = str_end;
787
788
            if constexpr (is_ltrim) {
789
                while (left_trim_pos < str_end) {
790
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
791
                        break;
792
                    }
793
                    ++left_trim_pos;
794
                }
795
            }
796
797
25
            if constexpr (is_rtrim) {
798
75
                while (right_trim_pos > left_trim_pos) {
799
68
                    --right_trim_pos;
800
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
801
18
                        ++right_trim_pos;
802
18
                        break;
803
18
                    }
804
68
                }
805
25
            }
806
807
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
808
            // The length of the result of the trim function will never exceed the length of the input.
809
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
810
25
        }
811
812
25
        return Status::OK();
813
25
    }
814
815
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
816
                                    const ColumnString::Offsets& str_offsets,
817
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
818
53
                                    ColumnString::Offsets& res_offsets) {
819
53
        const size_t offset_size = str_offsets.size();
820
53
        res_offsets.resize(offset_size);
821
53
        res_data.reserve(str_data.size());
822
823
53
        std::unordered_set<std::string_view> char_lookup;
824
53
        const char* remove_begin = remove_str.data;
825
53
        const char* remove_end = remove_str.data + remove_str.size;
826
827
240
        while (remove_begin < remove_end) {
828
187
            size_t byte_len, char_len;
829
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
830
187
                    remove_begin, remove_end, 1);
831
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
832
187
            remove_begin += byte_len;
833
187
        }
834
835
140
        for (size_t i = 0; i < offset_size; ++i) {
836
87
            const char* str_begin =
837
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
838
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
839
87
            const char* left_trim_pos = str_begin;
840
87
            const char* right_trim_pos = str_end;
841
842
87
            if constexpr (is_ltrim) {
843
81
                while (left_trim_pos < str_end) {
844
73
                    size_t byte_len, char_len;
845
73
                    std::tie(byte_len, char_len) =
846
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
847
73
                                                                                   str_end, 1);
848
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
849
73
                        char_lookup.end()) {
850
52
                        break;
851
52
                    }
852
21
                    left_trim_pos += byte_len;
853
21
                }
854
60
            }
855
856
87
            if constexpr (is_rtrim) {
857
88
                while (right_trim_pos > left_trim_pos) {
858
80
                    const char* prev_char_pos = right_trim_pos;
859
156
                    do {
860
156
                        --prev_char_pos;
861
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
862
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
863
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
864
80
                        char_lookup.end()) {
865
52
                        break;
866
52
                    }
867
28
                    right_trim_pos = prev_char_pos;
868
28
                }
869
60
            }
870
871
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
872
            // The length of the result of the trim function will never exceed the length of the input.
873
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
874
87
        }
875
53
        return Status::OK();
876
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
818
19
                                    ColumnString::Offsets& res_offsets) {
819
19
        const size_t offset_size = str_offsets.size();
820
19
        res_offsets.resize(offset_size);
821
19
        res_data.reserve(str_data.size());
822
823
19
        std::unordered_set<std::string_view> char_lookup;
824
19
        const char* remove_begin = remove_str.data;
825
19
        const char* remove_end = remove_str.data + remove_str.size;
826
827
84
        while (remove_begin < remove_end) {
828
65
            size_t byte_len, char_len;
829
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
830
65
                    remove_begin, remove_end, 1);
831
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
832
65
            remove_begin += byte_len;
833
65
        }
834
835
52
        for (size_t i = 0; i < offset_size; ++i) {
836
33
            const char* str_begin =
837
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
838
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
839
33
            const char* left_trim_pos = str_begin;
840
33
            const char* right_trim_pos = str_end;
841
842
33
            if constexpr (is_ltrim) {
843
45
                while (left_trim_pos < str_end) {
844
41
                    size_t byte_len, char_len;
845
41
                    std::tie(byte_len, char_len) =
846
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
847
41
                                                                                   str_end, 1);
848
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
849
41
                        char_lookup.end()) {
850
29
                        break;
851
29
                    }
852
12
                    left_trim_pos += byte_len;
853
12
                }
854
33
            }
855
856
33
            if constexpr (is_rtrim) {
857
48
                while (right_trim_pos > left_trim_pos) {
858
44
                    const char* prev_char_pos = right_trim_pos;
859
90
                    do {
860
90
                        --prev_char_pos;
861
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
862
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
863
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
864
44
                        char_lookup.end()) {
865
29
                        break;
866
29
                    }
867
15
                    right_trim_pos = prev_char_pos;
868
15
                }
869
33
            }
870
871
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
872
            // The length of the result of the trim function will never exceed the length of the input.
873
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
874
33
        }
875
19
        return Status::OK();
876
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
818
17
                                    ColumnString::Offsets& res_offsets) {
819
17
        const size_t offset_size = str_offsets.size();
820
17
        res_offsets.resize(offset_size);
821
17
        res_data.reserve(str_data.size());
822
823
17
        std::unordered_set<std::string_view> char_lookup;
824
17
        const char* remove_begin = remove_str.data;
825
17
        const char* remove_end = remove_str.data + remove_str.size;
826
827
78
        while (remove_begin < remove_end) {
828
61
            size_t byte_len, char_len;
829
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
830
61
                    remove_begin, remove_end, 1);
831
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
832
61
            remove_begin += byte_len;
833
61
        }
834
835
44
        for (size_t i = 0; i < offset_size; ++i) {
836
27
            const char* str_begin =
837
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
838
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
839
27
            const char* left_trim_pos = str_begin;
840
27
            const char* right_trim_pos = str_end;
841
842
27
            if constexpr (is_ltrim) {
843
36
                while (left_trim_pos < str_end) {
844
32
                    size_t byte_len, char_len;
845
32
                    std::tie(byte_len, char_len) =
846
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
847
32
                                                                                   str_end, 1);
848
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
849
32
                        char_lookup.end()) {
850
23
                        break;
851
23
                    }
852
9
                    left_trim_pos += byte_len;
853
9
                }
854
27
            }
855
856
            if constexpr (is_rtrim) {
857
                while (right_trim_pos > left_trim_pos) {
858
                    const char* prev_char_pos = right_trim_pos;
859
                    do {
860
                        --prev_char_pos;
861
                    } while ((*prev_char_pos & 0xC0) == 0x80);
862
                    size_t byte_len = right_trim_pos - prev_char_pos;
863
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
864
                        char_lookup.end()) {
865
                        break;
866
                    }
867
                    right_trim_pos = prev_char_pos;
868
                }
869
            }
870
871
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
872
            // The length of the result of the trim function will never exceed the length of the input.
873
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
874
27
        }
875
17
        return Status::OK();
876
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
818
17
                                    ColumnString::Offsets& res_offsets) {
819
17
        const size_t offset_size = str_offsets.size();
820
17
        res_offsets.resize(offset_size);
821
17
        res_data.reserve(str_data.size());
822
823
17
        std::unordered_set<std::string_view> char_lookup;
824
17
        const char* remove_begin = remove_str.data;
825
17
        const char* remove_end = remove_str.data + remove_str.size;
826
827
78
        while (remove_begin < remove_end) {
828
61
            size_t byte_len, char_len;
829
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
830
61
                    remove_begin, remove_end, 1);
831
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
832
61
            remove_begin += byte_len;
833
61
        }
834
835
44
        for (size_t i = 0; i < offset_size; ++i) {
836
27
            const char* str_begin =
837
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
838
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
839
27
            const char* left_trim_pos = str_begin;
840
27
            const char* right_trim_pos = str_end;
841
842
            if constexpr (is_ltrim) {
843
                while (left_trim_pos < str_end) {
844
                    size_t byte_len, char_len;
845
                    std::tie(byte_len, char_len) =
846
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
847
                                                                                   str_end, 1);
848
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
849
                        char_lookup.end()) {
850
                        break;
851
                    }
852
                    left_trim_pos += byte_len;
853
                }
854
            }
855
856
27
            if constexpr (is_rtrim) {
857
40
                while (right_trim_pos > left_trim_pos) {
858
36
                    const char* prev_char_pos = right_trim_pos;
859
66
                    do {
860
66
                        --prev_char_pos;
861
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
862
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
863
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
864
36
                        char_lookup.end()) {
865
23
                        break;
866
23
                    }
867
13
                    right_trim_pos = prev_char_pos;
868
13
                }
869
27
            }
870
871
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
872
            // The length of the result of the trim function will never exceed the length of the input.
873
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
874
27
        }
875
17
        return Status::OK();
876
17
    }
877
};
878
// This is an implementation of a parameter for the Trim function.
879
template <bool is_ltrim, bool is_rtrim, typename Name>
880
struct Trim1Impl {
881
    static constexpr auto name = Name::name;
882
883
157
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
883
45
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
883
35
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
883
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
883
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
883
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
883
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
884
885
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
886
138
                          uint32_t result, size_t input_rows_count) {
887
138
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
138
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
138
            auto col_res = ColumnString::create();
890
138
            char blank[] = " ";
891
138
            const StringRef remove_str(blank, 1);
892
138
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
138
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
138
                    col_res->get_offsets())));
895
138
            block.replace_by_position(result, std::move(col_res));
896
138
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
138
        return Status::OK();
902
138
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
48
                          uint32_t result, size_t input_rows_count) {
887
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
48
            auto col_res = ColumnString::create();
890
48
            char blank[] = " ";
891
48
            const StringRef remove_str(blank, 1);
892
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
48
                    col_res->get_offsets())));
895
48
            block.replace_by_position(result, std::move(col_res));
896
48
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
48
        return Status::OK();
902
48
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
37
                          uint32_t result, size_t input_rows_count) {
887
37
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
37
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
37
            auto col_res = ColumnString::create();
890
37
            char blank[] = " ";
891
37
            const StringRef remove_str(blank, 1);
892
37
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
37
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
37
                    col_res->get_offsets())));
895
37
            block.replace_by_position(result, std::move(col_res));
896
37
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
37
        return Status::OK();
902
37
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
41
                          uint32_t result, size_t input_rows_count) {
887
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
41
            auto col_res = ColumnString::create();
890
41
            char blank[] = " ";
891
41
            const StringRef remove_str(blank, 1);
892
41
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
41
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
41
                    col_res->get_offsets())));
895
41
            block.replace_by_position(result, std::move(col_res));
896
41
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
41
        return Status::OK();
902
41
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
1
                          uint32_t result, size_t input_rows_count) {
887
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
1
            auto col_res = ColumnString::create();
890
1
            char blank[] = " ";
891
1
            const StringRef remove_str(blank, 1);
892
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
1
                    col_res->get_offsets())));
895
1
            block.replace_by_position(result, std::move(col_res));
896
1
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
1
        return Status::OK();
902
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
5
                          uint32_t result, size_t input_rows_count) {
887
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
5
            auto col_res = ColumnString::create();
890
5
            char blank[] = " ";
891
5
            const StringRef remove_str(blank, 1);
892
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
5
                    col_res->get_offsets())));
895
5
            block.replace_by_position(result, std::move(col_res));
896
5
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
5
        return Status::OK();
902
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
6
                          uint32_t result, size_t input_rows_count) {
887
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
889
6
            auto col_res = ColumnString::create();
890
6
            char blank[] = " ";
891
6
            const StringRef remove_str(blank, 1);
892
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
893
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
894
6
                    col_res->get_offsets())));
895
6
            block.replace_by_position(result, std::move(col_res));
896
6
        } else {
897
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
898
0
                                        block.get_by_position(arguments[0]).column->get_name(),
899
0
                                        name);
900
0
        }
901
6
        return Status::OK();
902
6
    }
903
};
904
905
// This is an implementation of two parameters for the Trim function.
906
template <bool is_ltrim, bool is_rtrim, typename Name>
907
struct Trim2Impl {
908
    static constexpr auto name = Name::name;
909
910
226
    static DataTypes get_variadic_argument_types() {
911
226
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
226
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
910
20
    static DataTypes get_variadic_argument_types() {
911
20
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
20
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
910
29
    static DataTypes get_variadic_argument_types() {
911
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
910
84
    static DataTypes get_variadic_argument_types() {
911
84
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
84
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
910
27
    static DataTypes get_variadic_argument_types() {
911
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
910
29
    static DataTypes get_variadic_argument_types() {
911
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
910
37
    static DataTypes get_variadic_argument_types() {
911
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
912
37
    }
913
914
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
915
282
                          uint32_t result, size_t input_rows_count) {
916
282
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
282
        const auto& rcol =
918
282
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
282
                        ->get_data_column_ptr();
920
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
282
                auto col_res = ColumnString::create();
923
282
                const auto* remove_str_raw = col_right->get_chars().data();
924
282
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
282
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
282
                if (remove_str.size == 1) {
928
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
65
                            col_res->get_offsets())));
931
217
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
121
                                  std::is_same<Name, NameRTrimIn>::value) {
935
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
121
                                col->get_chars(), col->get_offsets(), remove_str,
937
121
                                col_res->get_chars(), col_res->get_offsets())));
938
121
                    } else {
939
96
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
96
                                col->get_chars(), col->get_offsets(), remove_str,
941
96
                                col_res->get_chars(), col_res->get_offsets())));
942
96
                    }
943
217
                }
944
282
                block.replace_by_position(result, std::move(col_res));
945
282
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
282
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
282
        return Status::OK();
957
282
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
915
26
                          uint32_t result, size_t input_rows_count) {
916
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
26
        const auto& rcol =
918
26
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
26
                        ->get_data_column_ptr();
920
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
26
                auto col_res = ColumnString::create();
923
26
                const auto* remove_str_raw = col_right->get_chars().data();
924
26
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
26
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
26
                if (remove_str.size == 1) {
928
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
2
                            col_res->get_offsets())));
931
24
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
                                  std::is_same<Name, NameRTrimIn>::value) {
935
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
                                col->get_chars(), col->get_offsets(), remove_str,
937
                                col_res->get_chars(), col_res->get_offsets())));
938
24
                    } else {
939
24
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
24
                                col->get_chars(), col->get_offsets(), remove_str,
941
24
                                col_res->get_chars(), col_res->get_offsets())));
942
24
                    }
943
24
                }
944
26
                block.replace_by_position(result, std::move(col_res));
945
26
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
26
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
26
        return Status::OK();
957
26
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
915
32
                          uint32_t result, size_t input_rows_count) {
916
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
32
        const auto& rcol =
918
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
32
                        ->get_data_column_ptr();
920
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
32
                auto col_res = ColumnString::create();
923
32
                const auto* remove_str_raw = col_right->get_chars().data();
924
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
32
                if (remove_str.size == 1) {
928
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
5
                            col_res->get_offsets())));
931
27
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
                                  std::is_same<Name, NameRTrimIn>::value) {
935
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
                                col->get_chars(), col->get_offsets(), remove_str,
937
                                col_res->get_chars(), col_res->get_offsets())));
938
27
                    } else {
939
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
27
                                col->get_chars(), col->get_offsets(), remove_str,
941
27
                                col_res->get_chars(), col_res->get_offsets())));
942
27
                    }
943
27
                }
944
32
                block.replace_by_position(result, std::move(col_res));
945
32
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
32
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
32
        return Status::OK();
957
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
915
85
                          uint32_t result, size_t input_rows_count) {
916
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
85
        const auto& rcol =
918
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
85
                        ->get_data_column_ptr();
920
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
85
                auto col_res = ColumnString::create();
923
85
                const auto* remove_str_raw = col_right->get_chars().data();
924
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
85
                if (remove_str.size == 1) {
928
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
40
                            col_res->get_offsets())));
931
45
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
                                  std::is_same<Name, NameRTrimIn>::value) {
935
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
                                col->get_chars(), col->get_offsets(), remove_str,
937
                                col_res->get_chars(), col_res->get_offsets())));
938
45
                    } else {
939
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
45
                                col->get_chars(), col->get_offsets(), remove_str,
941
45
                                col_res->get_chars(), col_res->get_offsets())));
942
45
                    }
943
45
                }
944
85
                block.replace_by_position(result, std::move(col_res));
945
85
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
85
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
85
        return Status::OK();
957
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
915
50
                          uint32_t result, size_t input_rows_count) {
916
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
50
        const auto& rcol =
918
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
50
                        ->get_data_column_ptr();
920
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
50
                auto col_res = ColumnString::create();
923
50
                const auto* remove_str_raw = col_right->get_chars().data();
924
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
50
                if (remove_str.size == 1) {
928
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
7
                            col_res->get_offsets())));
931
43
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
43
                                  std::is_same<Name, NameRTrimIn>::value) {
935
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
43
                                col->get_chars(), col->get_offsets(), remove_str,
937
43
                                col_res->get_chars(), col_res->get_offsets())));
938
                    } else {
939
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
                                col->get_chars(), col->get_offsets(), remove_str,
941
                                col_res->get_chars(), col_res->get_offsets())));
942
                    }
943
43
                }
944
50
                block.replace_by_position(result, std::move(col_res));
945
50
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
50
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
50
        return Status::OK();
957
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
915
41
                          uint32_t result, size_t input_rows_count) {
916
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
41
        const auto& rcol =
918
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
41
                        ->get_data_column_ptr();
920
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
41
                auto col_res = ColumnString::create();
923
41
                const auto* remove_str_raw = col_right->get_chars().data();
924
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
41
                if (remove_str.size == 1) {
928
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
5
                            col_res->get_offsets())));
931
36
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
36
                                  std::is_same<Name, NameRTrimIn>::value) {
935
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
36
                                col->get_chars(), col->get_offsets(), remove_str,
937
36
                                col_res->get_chars(), col_res->get_offsets())));
938
                    } else {
939
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
                                col->get_chars(), col->get_offsets(), remove_str,
941
                                col_res->get_chars(), col_res->get_offsets())));
942
                    }
943
36
                }
944
41
                block.replace_by_position(result, std::move(col_res));
945
41
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
41
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
41
        return Status::OK();
957
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
915
48
                          uint32_t result, size_t input_rows_count) {
916
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
917
48
        const auto& rcol =
918
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
919
48
                        ->get_data_column_ptr();
920
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
921
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
922
48
                auto col_res = ColumnString::create();
923
48
                const auto* remove_str_raw = col_right->get_chars().data();
924
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
925
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
926
927
48
                if (remove_str.size == 1) {
928
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
929
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
930
6
                            col_res->get_offsets())));
931
42
                } else {
932
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
933
                                  std::is_same<Name, NameLTrimIn>::value ||
934
42
                                  std::is_same<Name, NameRTrimIn>::value) {
935
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
936
42
                                col->get_chars(), col->get_offsets(), remove_str,
937
42
                                col_res->get_chars(), col_res->get_offsets())));
938
                    } else {
939
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
940
                                col->get_chars(), col->get_offsets(), remove_str,
941
                                col_res->get_chars(), col_res->get_offsets())));
942
                    }
943
42
                }
944
48
                block.replace_by_position(result, std::move(col_res));
945
48
            } else {
946
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                            block.get_by_position(arguments[1]).column->get_name(),
948
0
                                            name);
949
0
            }
950
951
48
        } else {
952
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
953
0
                                        block.get_by_position(arguments[0]).column->get_name(),
954
0
                                        name);
955
0
        }
956
48
        return Status::OK();
957
48
    }
958
};
959
960
template <typename impl>
961
class FunctionTrim : public IFunction {
962
public:
963
    static constexpr auto name = impl::name;
964
395
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
964
46
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
964
36
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
964
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
964
21
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
964
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
964
85
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
964
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
964
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
964
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
964
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
964
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
964
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
965
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
965
1
    String get_name() const override { return impl::name; }
966
967
287
    size_t get_number_of_arguments() const override {
968
287
        return get_variadic_argument_types_impl().size();
969
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
967
37
    size_t get_number_of_arguments() const override {
968
37
        return get_variadic_argument_types_impl().size();
969
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
967
27
    size_t get_number_of_arguments() const override {
968
27
        return get_variadic_argument_types_impl().size();
969
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
967
33
    size_t get_number_of_arguments() const override {
968
33
        return get_variadic_argument_types_impl().size();
969
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
967
12
    size_t get_number_of_arguments() const override {
968
12
        return get_variadic_argument_types_impl().size();
969
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
967
21
    size_t get_number_of_arguments() const override {
968
21
        return get_variadic_argument_types_impl().size();
969
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
967
76
    size_t get_number_of_arguments() const override {
968
76
        return get_variadic_argument_types_impl().size();
969
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
967
1
    size_t get_number_of_arguments() const override {
968
1
        return get_variadic_argument_types_impl().size();
969
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
967
5
    size_t get_number_of_arguments() const override {
968
5
        return get_variadic_argument_types_impl().size();
969
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
967
6
    size_t get_number_of_arguments() const override {
968
6
        return get_variadic_argument_types_impl().size();
969
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
967
19
    size_t get_number_of_arguments() const override {
968
19
        return get_variadic_argument_types_impl().size();
969
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
967
21
    size_t get_number_of_arguments() const override {
968
21
        return get_variadic_argument_types_impl().size();
969
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
967
29
    size_t get_number_of_arguments() const override {
968
29
        return get_variadic_argument_types_impl().size();
969
29
    }
970
971
287
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
287
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
287
        return arguments[0];
978
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
37
        return arguments[0];
978
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
27
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
27
        return arguments[0];
978
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
33
        return arguments[0];
978
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
12
        return arguments[0];
978
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
21
        return arguments[0];
978
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
76
        return arguments[0];
978
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
1
        return arguments[0];
978
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
5
        return arguments[0];
978
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
6
        return arguments[0];
978
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
19
        return arguments[0];
978
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
21
        return arguments[0];
978
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
971
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
972
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
973
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
974
0
                                   "Illegal type {} of argument of function {}",
975
0
                                   arguments[0]->get_name(), get_name());
976
0
        }
977
29
        return arguments[0];
978
29
    }
979
    // The second parameter of "trim" is a constant.
980
570
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
58
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
37
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
980
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
981
982
383
    DataTypes get_variadic_argument_types_impl() const override {
983
383
        return impl::get_variadic_argument_types();
984
383
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
45
    DataTypes get_variadic_argument_types_impl() const override {
983
45
        return impl::get_variadic_argument_types();
984
45
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
35
    DataTypes get_variadic_argument_types_impl() const override {
983
35
        return impl::get_variadic_argument_types();
984
35
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
41
    DataTypes get_variadic_argument_types_impl() const override {
983
41
        return impl::get_variadic_argument_types();
984
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
20
    DataTypes get_variadic_argument_types_impl() const override {
983
20
        return impl::get_variadic_argument_types();
984
20
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
29
    DataTypes get_variadic_argument_types_impl() const override {
983
29
        return impl::get_variadic_argument_types();
984
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
84
    DataTypes get_variadic_argument_types_impl() const override {
983
84
        return impl::get_variadic_argument_types();
984
84
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
9
    DataTypes get_variadic_argument_types_impl() const override {
983
9
        return impl::get_variadic_argument_types();
984
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
13
    DataTypes get_variadic_argument_types_impl() const override {
983
13
        return impl::get_variadic_argument_types();
984
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
14
    DataTypes get_variadic_argument_types_impl() const override {
983
14
        return impl::get_variadic_argument_types();
984
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
27
    DataTypes get_variadic_argument_types_impl() const override {
983
27
        return impl::get_variadic_argument_types();
984
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
29
    DataTypes get_variadic_argument_types_impl() const override {
983
29
        return impl::get_variadic_argument_types();
984
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
982
37
    DataTypes get_variadic_argument_types_impl() const override {
983
37
        return impl::get_variadic_argument_types();
984
37
    }
985
986
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
987
420
                        uint32_t result, size_t input_rows_count) const override {
988
420
        return impl::execute(context, block, arguments, result, input_rows_count);
989
420
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
48
                        uint32_t result, size_t input_rows_count) const override {
988
48
        return impl::execute(context, block, arguments, result, input_rows_count);
989
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
37
                        uint32_t result, size_t input_rows_count) const override {
988
37
        return impl::execute(context, block, arguments, result, input_rows_count);
989
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
41
                        uint32_t result, size_t input_rows_count) const override {
988
41
        return impl::execute(context, block, arguments, result, input_rows_count);
989
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
26
                        uint32_t result, size_t input_rows_count) const override {
988
26
        return impl::execute(context, block, arguments, result, input_rows_count);
989
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
32
                        uint32_t result, size_t input_rows_count) const override {
988
32
        return impl::execute(context, block, arguments, result, input_rows_count);
989
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
85
                        uint32_t result, size_t input_rows_count) const override {
988
85
        return impl::execute(context, block, arguments, result, input_rows_count);
989
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
1
                        uint32_t result, size_t input_rows_count) const override {
988
1
        return impl::execute(context, block, arguments, result, input_rows_count);
989
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
5
                        uint32_t result, size_t input_rows_count) const override {
988
5
        return impl::execute(context, block, arguments, result, input_rows_count);
989
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
6
                        uint32_t result, size_t input_rows_count) const override {
988
6
        return impl::execute(context, block, arguments, result, input_rows_count);
989
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
50
                        uint32_t result, size_t input_rows_count) const override {
988
50
        return impl::execute(context, block, arguments, result, input_rows_count);
989
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
41
                        uint32_t result, size_t input_rows_count) const override {
988
41
        return impl::execute(context, block, arguments, result, input_rows_count);
989
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
987
48
                        uint32_t result, size_t input_rows_count) const override {
988
48
        return impl::execute(context, block, arguments, result, input_rows_count);
989
48
    }
990
};
991
992
struct UnHexImplEmpty {
993
    static constexpr auto name = "unhex";
994
};
995
996
struct UnHexImplNull {
997
    static constexpr auto name = "unhex_null";
998
};
999
1000
template <typename Name>
1001
struct UnHexImpl {
1002
    static constexpr auto name = Name::name;
1003
    using ReturnType = DataTypeString;
1004
    using ColumnType = ColumnString;
1005
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1006
1007
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1008
160
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1009
160
        auto rows_count = offsets.size();
1010
160
        dst_offsets.resize(rows_count);
1011
1012
160
        int64_t total_size = 0;
1013
368
        for (size_t i = 0; i < rows_count; i++) {
1014
208
            size_t len = offsets[i] - offsets[i - 1];
1015
208
            total_size += len / 2;
1016
208
        }
1017
160
        ColumnString::check_chars_length(total_size, rows_count);
1018
160
        dst_data.resize(total_size);
1019
160
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1020
160
        size_t offset = 0;
1021
1022
368
        for (int i = 0; i < rows_count; ++i) {
1023
208
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1024
208
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1025
1026
208
            if (UNLIKELY(srclen == 0)) {
1027
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1028
13
                continue;
1029
13
            }
1030
1031
195
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1032
1033
195
            offset += outlen;
1034
195
            dst_offsets[i] = cast_set<uint32_t>(offset);
1035
195
        }
1036
160
        dst_data.pop_back(total_size - offset);
1037
160
        return Status::OK();
1038
160
    }
1039
1040
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1041
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1042
33
                         ColumnUInt8::Container* null_map_data) {
1043
33
        auto rows_count = offsets.size();
1044
33
        dst_offsets.resize(rows_count);
1045
1046
33
        int64_t total_size = 0;
1047
84
        for (size_t i = 0; i < rows_count; i++) {
1048
51
            size_t len = offsets[i] - offsets[i - 1];
1049
51
            total_size += len / 2;
1050
51
        }
1051
33
        ColumnString::check_chars_length(total_size, rows_count);
1052
33
        dst_data.resize(total_size);
1053
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1054
33
        size_t offset = 0;
1055
1056
84
        for (int i = 0; i < rows_count; ++i) {
1057
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1058
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1059
1060
51
            if (UNLIKELY(srclen == 0)) {
1061
7
                (*null_map_data)[i] = 1;
1062
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1063
7
                continue;
1064
7
            }
1065
1066
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1067
1068
44
            if (outlen == 0) {
1069
13
                (*null_map_data)[i] = 1;
1070
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1071
13
                continue;
1072
13
            }
1073
1074
31
            offset += outlen;
1075
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1076
31
        }
1077
33
        dst_data.pop_back(total_size - offset);
1078
33
        return Status::OK();
1079
33
    }
1080
};
1081
1082
struct NameStringSpace {
1083
    static constexpr auto name = "space";
1084
};
1085
1086
struct StringSpace {
1087
    using ReturnType = DataTypeString;
1088
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1089
    using Type = Int32;
1090
    using ReturnColumnType = ColumnString;
1091
1092
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1093
10
                         ColumnString::Offsets& res_offsets) {
1094
10
        res_offsets.resize(data.size());
1095
10
        size_t input_size = res_offsets.size();
1096
10
        int64_t total_size = 0;
1097
34
        for (size_t i = 0; i < input_size; ++i) {
1098
24
            if (data[i] > 0) {
1099
14
                total_size += data[i];
1100
14
            }
1101
24
        }
1102
10
        ColumnString::check_chars_length(total_size, input_size);
1103
10
        res_data.reserve(total_size);
1104
1105
34
        for (size_t i = 0; i < input_size; ++i) {
1106
24
            if (data[i] > 0) [[likely]] {
1107
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1108
14
                cast_set(res_offsets[i], res_data.size());
1109
14
            } else {
1110
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1111
10
            }
1112
24
        }
1113
10
        return Status::OK();
1114
10
    }
1115
};
1116
1117
struct ToBase64Impl {
1118
    static constexpr auto name = "to_base64";
1119
    using ReturnType = DataTypeString;
1120
    using ColumnType = ColumnString;
1121
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1122
1123
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1124
107
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1125
107
        auto rows_count = offsets.size();
1126
107
        dst_offsets.resize(rows_count);
1127
1128
107
        size_t total_size = 0;
1129
250
        for (size_t i = 0; i < rows_count; i++) {
1130
143
            size_t len = offsets[i] - offsets[i - 1];
1131
143
            total_size += 4 * ((len + 2) / 3);
1132
143
        }
1133
107
        ColumnString::check_chars_length(total_size, rows_count);
1134
107
        dst_data.resize(total_size);
1135
107
        auto* dst_data_ptr = dst_data.data();
1136
107
        size_t offset = 0;
1137
1138
250
        for (int i = 0; i < rows_count; ++i) {
1139
143
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1140
143
            size_t srclen = offsets[i] - offsets[i - 1];
1141
1142
143
            if (UNLIKELY(srclen == 0)) {
1143
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1144
7
                continue;
1145
7
            }
1146
1147
136
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1148
136
                                               (unsigned char*)(dst_data_ptr + offset));
1149
1150
136
            offset += outlen;
1151
136
            dst_offsets[i] = cast_set<uint32_t>(offset);
1152
136
        }
1153
107
        dst_data.pop_back(total_size - offset);
1154
107
        return Status::OK();
1155
107
    }
1156
};
1157
1158
struct FromBase64Impl {
1159
    static constexpr auto name = "from_base64";
1160
    using ReturnType = DataTypeString;
1161
    using ColumnType = ColumnString;
1162
1163
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1164
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1165
117
                         NullMap& null_map) {
1166
117
        auto rows_count = offsets.size();
1167
117
        dst_offsets.resize(rows_count);
1168
1169
117
        size_t total_size = 0;
1170
287
        for (size_t i = 0; i < rows_count; i++) {
1171
170
            auto len = offsets[i] - offsets[i - 1];
1172
170
            total_size += len / 4 * 3;
1173
170
        }
1174
117
        ColumnString::check_chars_length(total_size, rows_count);
1175
117
        dst_data.resize(total_size);
1176
117
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1177
117
        size_t offset = 0;
1178
1179
287
        for (int i = 0; i < rows_count; ++i) {
1180
170
            if (UNLIKELY(null_map[i])) {
1181
0
                null_map[i] = 1;
1182
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1183
0
                continue;
1184
0
            }
1185
1186
170
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1187
170
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1188
1189
170
            if (UNLIKELY(srclen == 0)) {
1190
6
                dst_offsets[i] = cast_set<uint32_t>(offset);
1191
6
                continue;
1192
6
            }
1193
1194
164
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1195
1196
164
            if (outlen < 0) {
1197
60
                null_map[i] = 1;
1198
60
                dst_offsets[i] = cast_set<uint32_t>(offset);
1199
104
            } else {
1200
104
                offset += outlen;
1201
104
                dst_offsets[i] = cast_set<uint32_t>(offset);
1202
104
            }
1203
164
        }
1204
117
        dst_data.pop_back(total_size - offset);
1205
117
        return Status::OK();
1206
117
    }
1207
};
1208
1209
struct StringAppendTrailingCharIfAbsent {
1210
    static constexpr auto name = "append_trailing_char_if_absent";
1211
    using Chars = ColumnString::Chars;
1212
    using Offsets = ColumnString::Offsets;
1213
    using ReturnType = DataTypeString;
1214
    using ColumnType = ColumnString;
1215
1216
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1217
48
        if (str.size < end.size) {
1218
11
            return false;
1219
11
        }
1220
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1221
37
        return str.end_with(end);
1222
48
    }
1223
1224
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1225
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1226
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1227
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1228
56
        size_t input_rows_count = loffsets.size();
1229
56
        res_offsets.resize(input_rows_count);
1230
56
        fmt::memory_buffer buffer;
1231
1232
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1233
102
            buffer.clear();
1234
1235
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1236
102
                                       loffsets[i] - loffsets[i - 1]);
1237
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1238
102
                                       roffsets[i] - roffsets[i - 1]);
1239
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1240
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1241
102
                    rstr.begin(), rstr.end(), 2);
1242
1243
102
            if (char_len != 1) {
1244
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1245
66
                continue;
1246
66
            }
1247
36
            if (str_end_with(lstr, rstr)) {
1248
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1249
9
                continue;
1250
9
            }
1251
1252
27
            buffer.append(lstr.begin(), lstr.end());
1253
27
            buffer.append(rstr.begin(), rstr.end());
1254
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1255
27
                                        res_offsets);
1256
27
        }
1257
56
    }
1258
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1259
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1260
8
                              NullMap& null_map_data) {
1261
8
        size_t input_rows_count = loffsets.size();
1262
8
        res_offsets.resize(input_rows_count);
1263
8
        fmt::memory_buffer buffer;
1264
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1265
8
        auto [byte_len, char_len] =
1266
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1267
8
        if (char_len != 1) {
1268
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1269
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1270
2
            }
1271
2
            return;
1272
2
        }
1273
1274
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1275
6
            buffer.clear();
1276
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1277
6
                                       loffsets[i] - loffsets[i - 1]);
1278
1279
6
            if (str_end_with(lstr, rstr)) {
1280
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1281
2
                continue;
1282
2
            }
1283
1284
4
            buffer.append(lstr.begin(), lstr.end());
1285
4
            buffer.append(rstr.begin(), rstr.end());
1286
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1287
4
                                        res_offsets);
1288
4
        }
1289
6
    }
1290
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1291
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1292
8
                              NullMap& null_map_data) {
1293
8
        size_t input_rows_count = roffsets.size();
1294
8
        res_offsets.resize(input_rows_count);
1295
8
        fmt::memory_buffer buffer;
1296
1297
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1298
8
            buffer.clear();
1299
1300
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1301
8
                                       roffsets[i] - roffsets[i - 1]);
1302
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1303
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1304
8
                    rstr.begin(), rstr.end(), 2);
1305
1306
8
            if (char_len != 1) {
1307
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1308
2
                continue;
1309
2
            }
1310
6
            if (str_end_with(lstr, rstr)) {
1311
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1312
2
                continue;
1313
2
            }
1314
1315
4
            buffer.append(lstr.begin(), lstr.end());
1316
4
            buffer.append(rstr.begin(), rstr.end());
1317
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1318
4
                                        res_offsets);
1319
4
        }
1320
8
    }
1321
};
1322
1323
struct StringLPad {
1324
    static constexpr auto name = "lpad";
1325
    static constexpr auto is_lpad = true;
1326
};
1327
1328
struct StringRPad {
1329
    static constexpr auto name = "rpad";
1330
    static constexpr auto is_lpad = false;
1331
};
1332
1333
template <typename LeftDataType, typename RightDataType>
1334
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1335
1336
template <typename LeftDataType, typename RightDataType>
1337
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1338
1339
template <typename LeftDataType, typename RightDataType>
1340
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1341
1342
// ready for regist function
1343
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1344
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1345
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1346
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1347
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1348
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1349
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1350
1351
namespace {
1352
1353
3.45k
ZoneMapFilterResult unsupported_starts_with_zonemap(const ZoneMapEvalContext& ctx) {
1354
3.45k
    ++ctx.stats.unsupported_expr_count;
1355
3.45k
    return ZoneMapFilterResult::kUnsupported;
1356
3.45k
}
1357
1358
56
bool field_less_for_starts_with_zonemap(const Field& lhs, const Field& rhs) {
1359
56
    return (lhs <=> rhs) == std::strong_ordering::less;
1360
56
}
1361
1362
56
Field string_field_for_starts_with_zonemap(std::string_view value) {
1363
56
    return Field::create_field<TYPE_STRING>(std::string(value));
1364
56
}
1365
1366
18
std::optional<std::string> next_prefix_for_starts_with_zonemap(std::string_view prefix) {
1367
    // ZoneMap string bounds are compared by bytewise Field ordering. For starts_with(s, p),
1368
    // the safe upper bound is the next byte string after p: p <= s < next_prefix(p).
1369
18
    std::string upper(prefix);
1370
18
    for (auto i = static_cast<int64_t>(upper.size()) - 1; i >= 0; --i) {
1371
18
        auto byte = static_cast<unsigned char>(upper[i]);
1372
18
        if (byte != std::numeric_limits<unsigned char>::max()) {
1373
18
            upper[i] = static_cast<char>(byte + 1);
1374
18
            upper.resize(i + 1);
1375
18
            return upper;
1376
18
        }
1377
18
    }
1378
0
    return std::nullopt;
1379
18
}
1380
1381
3.55k
bool supports_starts_with_zonemap_slot_type(const DataTypePtr& data_type) {
1382
3.55k
    if (data_type == nullptr) {
1383
0
        return false;
1384
0
    }
1385
3.55k
    auto primitive_type = remove_nullable(data_type)->get_primitive_type();
1386
3.65k
    return primitive_type != TYPE_CHAR && is_string_type(primitive_type);
1387
3.55k
}
1388
1389
ZoneMapFilterResult evaluate_starts_with_zonemap(const ZoneMapEvalContext& ctx,
1390
3.46k
                                                 const VExprSPtrs& arguments) {
1391
3.46k
    auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
1392
3.47k
    if (!slot_literal.has_value() || slot_literal->literal_on_left) {
1393
0
        return unsupported_starts_with_zonemap(ctx);
1394
0
    }
1395
3.46k
    if (slot_literal->literal.is_null()) {
1396
0
        return ZoneMapFilterResult::kNoMatch;
1397
0
    }
1398
3.46k
    const auto* slot_type = ctx.data_type(slot_literal->slot_index);
1399
3.57k
    if (slot_type == nullptr || *slot_type == nullptr || slot_literal->literal_type == nullptr) {
1400
0
        return unsupported_starts_with_zonemap(ctx);
1401
0
    }
1402
3.46k
    if (!supports_starts_with_zonemap_slot_type(*slot_type) ||
1403
3.58k
        !is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type())) {
1404
0
        ++ctx.stats.type_mismatch_count;
1405
0
        return unsupported_starts_with_zonemap(ctx);
1406
0
    }
1407
3.46k
    auto zone_map_ref = expr_zonemap::fetch_zone_map(ctx, slot_literal->slot_index);
1408
3.46k
    if (zone_map_ref == nullptr) {
1409
0
        return ZoneMapFilterResult::kUnsupported;
1410
0
    }
1411
3.46k
    const auto& zone_map = *zone_map_ref;
1412
3.46k
    if (!zone_map.has_not_null) {
1413
1
        return ZoneMapFilterResult::kNoMatch;
1414
1
    }
1415
3.46k
    if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, *slot_type)) {
1416
3.46k
        return unsupported_starts_with_zonemap(ctx);
1417
3.46k
    }
1418
1419
7
    const auto prefix = slot_literal->literal.as_string_view();
1420
7
    if (prefix.empty()) {
1421
0
        return ZoneMapFilterResult::kMayMatch;
1422
0
    }
1423
7
    auto lower = string_field_for_starts_with_zonemap(prefix);
1424
20
    if (field_less_for_starts_with_zonemap(zone_map.max_value, lower)) {
1425
20
        return ZoneMapFilterResult::kNoMatch;
1426
20
    }
1427
18.4E
    auto upper_prefix = next_prefix_for_starts_with_zonemap(prefix);
1428
18.4E
    if (upper_prefix.has_value() &&
1429
18.4E
        !field_less_for_starts_with_zonemap(zone_map.min_value,
1430
18
                                            string_field_for_starts_with_zonemap(*upper_prefix))) {
1431
1
        return ZoneMapFilterResult::kNoMatch;
1432
1
    }
1433
18.4E
    return ZoneMapFilterResult::kMayMatch;
1434
18.4E
}
1435
1436
36
bool can_evaluate_starts_with_zonemap(const VExprSPtrs& arguments) {
1437
36
    auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
1438
38
    if (!slot_literal.has_value() || slot_literal->literal_on_left) {
1439
0
        return false;
1440
0
    }
1441
36
    return supports_starts_with_zonemap_slot_type(slot_literal->slot_type) &&
1442
37
           slot_literal->literal_type != nullptr &&
1443
37
           is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type());
1444
36
}
1445
1446
} // namespace
1447
1448
class FunctionStringStartsWith : public FunctionBinaryToType<DataTypeString, DataTypeString,
1449
                                                             StringStartsWithImpl, NameStartsWith> {
1450
public:
1451
109
    static FunctionPtr create() { return std::make_shared<FunctionStringStartsWith>(); }
1452
    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx,
1453
3.47k
                                                const VExprSPtrs& arguments) const override {
1454
3.47k
        return evaluate_starts_with_zonemap(ctx, arguments);
1455
3.47k
    }
1456
1457
36
    bool can_evaluate_zonemap_filter(const VExprSPtrs& arguments) const override {
1458
36
        return can_evaluate_starts_with_zonemap(arguments);
1459
36
    }
1460
};
1461
1462
using FunctionStringEndsWith =
1463
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1464
using FunctionStringInstr =
1465
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1466
using FunctionStringLocate =
1467
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1468
using FunctionStringFindInSet =
1469
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1470
1471
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1472
1473
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1474
1475
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1476
1477
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1478
1479
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1480
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1481
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1482
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1483
1484
using FunctionStringAppendTrailingCharIfAbsent =
1485
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1486
1487
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1488
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1489
1490
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1491
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1492
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1493
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1494
extern void register_function_string_search(SimpleFunctionFactory& factory);
1495
extern void register_function_string_url(SimpleFunctionFactory& factory);
1496
1497
8
void register_function_string(SimpleFunctionFactory& factory) {
1498
8
    register_function_string_basic(factory);
1499
8
    register_function_string_digest(factory);
1500
8
    register_function_string_mask(factory);
1501
8
    register_function_string_misc(factory);
1502
8
    register_function_string_search(factory);
1503
8
    register_function_string_url(factory);
1504
1505
8
    factory.register_function<FunctionStringParseDataSize>();
1506
8
    factory.register_function<FunctionStringASCII>();
1507
8
    factory.register_function<FunctionStringLength>();
1508
8
    factory.register_function<FunctionCrc32>();
1509
8
    factory.register_function<FunctionStringUTF8Length>();
1510
8
    factory.register_function<FunctionStringSpace>();
1511
8
    factory.register_function<FunctionStringStartsWith>();
1512
8
    factory.register_function<FunctionStringEndsWith>();
1513
8
    factory.register_function<FunctionStringInstr>();
1514
8
    factory.register_function<FunctionStringFindInSet>();
1515
8
    factory.register_function<FunctionStringLocate>();
1516
8
    factory.register_function<FunctionQuote>();
1517
8
    factory.register_function<FunctionReverseCommon>();
1518
8
    factory.register_function<FunctionUnHex>();
1519
8
    factory.register_function<FunctionUnHexNullable>();
1520
8
    factory.register_function<FunctionToLower>();
1521
8
    factory.register_function<FunctionToUpper>();
1522
8
    factory.register_function<FunctionToInitcap>();
1523
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1524
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1525
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1526
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1527
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1528
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1529
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1530
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1531
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1532
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1533
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1534
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1535
8
    factory.register_function<FunctionStringConcat>();
1536
8
    factory.register_function<FunctionStringElt>();
1537
8
    factory.register_function<FunctionStringConcatWs>();
1538
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1539
8
    factory.register_function<FunctionStringRepeat>();
1540
8
    factory.register_function<FunctionStringLPad>();
1541
8
    factory.register_function<FunctionStringRPad>();
1542
8
    factory.register_function<FunctionToBase64>();
1543
8
    factory.register_function<FunctionFromBase64>();
1544
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1545
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1546
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1547
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1548
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1549
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1550
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1551
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1552
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1553
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1554
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1555
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1556
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1557
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1558
8
    factory.register_function<
1559
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1560
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1561
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1562
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1563
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1564
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1565
8
    factory.register_function<FunctionOverlay>();
1566
8
    factory.register_function<FunctionIsValidUTF8>();
1567
1568
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1569
8
    factory.register_alias(FunctionToLower::name, "lcase");
1570
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1571
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1572
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1573
8
    factory.register_alias(FunctionOverlay::name, "insert");
1574
8
}
1575
1576
} // namespace doris