Coverage Report

Created: 2026-07-02 17:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <compare>
28
#include <cstddef>
29
#include <cstdint>
30
#include <limits>
31
#include <optional>
32
#include <string>
33
#include <string_view>
34
35
#include "common/cast_set.h"
36
#include "common/check.h"
37
#include "common/logging.h"
38
#include "common/status.h"
39
#include "core/column/column.h"
40
#include "core/column/column_string.h"
41
#include "core/data_type/data_type_nullable.h"
42
#include "core/pod_array_fwd.h"
43
#include "core/string_ref.h"
44
#include "exprs/expr_zonemap_filter.h"
45
#include "exprs/function/function_reverse.h"
46
#include "exprs/function/function_string_concat.h"
47
#include "exprs/function/function_string_format.h"
48
#include "exprs/function/function_string_replace.h"
49
#include "exprs/function/function_string_to_string.h"
50
#include "exprs/function/function_totype.h"
51
#include "exprs/function/simple_function_factory.h"
52
#include "exprs/function/string_hex_util.h"
53
#include "util/string_search.hpp"
54
#include "util/url_coding.h"
55
#include "util/utf8_check.h"
56
57
namespace doris {
58
struct NameStringASCII {
59
    static constexpr auto name = "ascii";
60
};
61
62
struct StringASCII {
63
    using ReturnType = DataTypeInt32;
64
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
65
    using Type = String;
66
    using ReturnColumnType = ColumnInt32;
67
68
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
69
54
                         PaddedPODArray<Int32>& res) {
70
54
        auto size = offsets.size();
71
54
        res.resize(size);
72
152
        for (int i = 0; i < size; ++i) {
73
98
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
74
98
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
75
98
        }
76
54
        return Status::OK();
77
54
    }
78
};
79
80
struct NameParseDataSize {
81
    static constexpr auto name = "parse_data_size";
82
};
83
84
static const std::map<std::string_view, Int128> UNITS = {
85
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
86
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
87
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
88
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
89
        {"YB", static_cast<Int128>(1) << 80}};
90
91
struct ParseDataSize {
92
    using ReturnType = DataTypeInt128;
93
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
94
    using Type = String;
95
    using ReturnColumnType = ColumnInt128;
96
97
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
98
50
                         PaddedPODArray<Int128>& res) {
99
50
        auto size = offsets.size();
100
50
        res.resize(size);
101
104
        for (int i = 0; i < size; ++i) {
102
54
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
103
54
            int str_size = offsets[i] - offsets[i - 1];
104
54
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
105
54
        }
106
50
        return Status::OK();
107
50
    }
108
109
54
    static Int128 parse_data_size(const std::string_view& dataSize) {
110
54
        int digit_length = 0;
111
230
        for (char c : dataSize) {
112
230
            if (isdigit(c) || c == '.') {
113
178
                digit_length++;
114
178
            } else {
115
52
                break;
116
52
            }
117
230
        }
118
119
54
        if (digit_length == 0) {
120
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
121
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
122
4
                                   dataSize);
123
4
        }
124
        // 123.45MB--->123.45 : MB
125
50
        double value = 0.0;
126
50
        try {
127
50
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
128
50
        } catch (const std::exception& e) {
129
0
            throw doris::Exception(
130
0
                    ErrorCode::INVALID_ARGUMENT,
131
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
132
0
                    dataSize, e.what());
133
0
        }
134
50
        auto unit = dataSize.substr(digit_length);
135
50
        auto it = UNITS.find(unit);
136
50
        if (it != UNITS.end()) {
137
47
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
138
47
        } else {
139
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
140
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
141
3
                                   dataSize);
142
3
        }
143
50
    }
144
};
145
146
struct NameQuote {
147
    static constexpr auto name = "quote";
148
};
149
150
struct NameQuoteImpl {
151
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
152
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
153
17
        size_t offset_size = offsets.size();
154
17
        ColumnString::Offset pos = 0;
155
17
        res_offsets.resize(offset_size);
156
17
        res_data.resize(data.size() + offset_size * 2);
157
45
        for (int i = 0; i < offset_size; i++) {
158
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
159
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
160
28
            res_data[pos] = '\'';
161
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
162
28
            res_data[pos + size + 1] = '\'';
163
28
            pos += size + 2;
164
28
            res_offsets[i] = pos;
165
28
        }
166
17
        return Status::OK();
167
17
    }
168
};
169
170
struct NameStringLength {
171
    static constexpr auto name = "length";
172
};
173
174
struct StringLengthImpl {
175
    using ReturnType = DataTypeInt32;
176
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
177
    using Type = String;
178
    using ReturnColumnType = ColumnInt32;
179
180
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
181
9.59k
                         PaddedPODArray<Int32>& res) {
182
9.59k
        auto size = offsets.size();
183
9.59k
        res.resize(size);
184
8.29M
        for (int i = 0; i < size; ++i) {
185
8.28M
            int str_size = offsets[i] - offsets[i - 1];
186
8.28M
            res[i] = str_size;
187
8.28M
        }
188
9.59k
        return Status::OK();
189
9.59k
    }
190
};
191
192
struct NameCrc32 {
193
    static constexpr auto name = "crc32";
194
};
195
196
struct Crc32Impl {
197
    using ReturnType = DataTypeInt64;
198
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
199
    using Type = String;
200
    using ReturnColumnType = ColumnInt64;
201
202
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
203
3
                         PaddedPODArray<Int64>& res) {
204
3
        auto size = offsets.size();
205
3
        res.resize(size);
206
6
        for (int i = 0; i < size; ++i) {
207
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
208
3
                             offsets[i] - offsets[i - 1]);
209
3
        }
210
3
        return Status::OK();
211
3
    }
212
};
213
214
struct NameStringUtf8Length {
215
    static constexpr auto name = "char_length";
216
};
217
218
struct StringUtf8LengthImpl {
219
    using ReturnType = DataTypeInt32;
220
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
221
    using Type = String;
222
    using ReturnColumnType = ColumnInt32;
223
224
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
225
65
                         PaddedPODArray<Int32>& res) {
226
65
        auto size = offsets.size();
227
65
        res.resize(size);
228
179
        for (int i = 0; i < size; ++i) {
229
114
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
230
114
            int str_size = offsets[i] - offsets[i - 1];
231
114
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
232
114
        }
233
65
        return Status::OK();
234
65
    }
235
};
236
237
struct NameIsValidUTF8 {
238
    static constexpr auto name = "is_valid_utf8";
239
};
240
241
struct IsValidUTF8Impl {
242
    using ReturnType = DataTypeUInt8;
243
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
244
    using Type = String;
245
    using ReturnColumnType = ColumnUInt8;
246
247
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
248
39
                         PaddedPODArray<UInt8>& res) {
249
39
        auto size = offsets.size();
250
39
        res.resize(size);
251
98
        for (size_t i = 0; i < size; ++i) {
252
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
253
59
            size_t str_size = offsets[i] - offsets[i - 1];
254
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
255
59
        }
256
39
        return Status::OK();
257
39
    }
258
};
259
260
struct NameStartsWith {
261
    static constexpr auto name = "starts_with";
262
};
263
264
struct StartsWithOp {
265
    using ResultDataType = DataTypeUInt8;
266
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
267
268
11.4k
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
269
11.4k
        res = strl.starts_with(strr);
270
11.4k
    }
271
};
272
273
struct NameEndsWith {
274
    static constexpr auto name = "ends_with";
275
};
276
277
struct EndsWithOp {
278
    using ResultDataType = DataTypeUInt8;
279
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
280
281
142
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
282
142
        res = strl.ends_with(strr);
283
142
    }
284
};
285
286
struct NameFindInSet {
287
    static constexpr auto name = "find_in_set";
288
};
289
290
struct FindInSetOp {
291
    using ResultDataType = DataTypeInt32;
292
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
293
171
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
294
670
        for (const auto& c : strl) {
295
670
            if (c == ',') {
296
21
                res = 0;
297
21
                return;
298
21
            }
299
670
        }
300
301
150
        int32_t token_index = 1;
302
150
        int32_t start = 0;
303
150
        int32_t end;
304
305
254
        do {
306
254
            end = start;
307
            // Position end.
308
1.06k
            while (end < strr.length() && strr[end] != ',') {
309
807
                ++end;
310
807
            }
311
312
254
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
313
93
                res = token_index;
314
93
                return;
315
93
            }
316
317
            // Re-position start and end past ','
318
161
            start = end + 1;
319
161
            ++token_index;
320
161
        } while (start < strr.length());
321
57
        res = 0;
322
57
    }
323
};
324
325
struct NameInstr {
326
    static constexpr auto name = "instr";
327
};
328
329
// LeftDataType and RightDataType are DataTypeString
330
template <typename LeftDataType, typename RightDataType>
331
struct StringInStrImpl {
332
    using ResultDataType = DataTypeInt32;
333
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
334
335
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
336
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
337
72
        StringRef lstr_ref(ldata.data, ldata.size);
338
339
72
        auto size = roffsets.size();
340
72
        res.resize(size);
341
144
        for (int i = 0; i < size; ++i) {
342
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
343
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
344
345
72
            StringRef rstr_ref(r_raw_str, r_str_size);
346
347
72
            res[i] = execute(lstr_ref, rstr_ref);
348
72
        }
349
350
72
        return Status::OK();
351
72
    }
352
353
    static Status vector_scalar(const ColumnString::Chars& ldata,
354
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
355
86
                                ResultPaddedPODArray& res) {
356
86
        auto size = loffsets.size();
357
86
        res.resize(size);
358
359
86
        if (rdata.size == 0) {
360
12
            std::fill(res.begin(), res.end(), 1);
361
12
            return Status::OK();
362
12
        }
363
364
74
        const UInt8* begin = ldata.data();
365
74
        const UInt8* end = begin + ldata.size();
366
74
        const UInt8* pos = begin;
367
368
        /// Current index in the array of strings.
369
74
        size_t i = 0;
370
74
        std::fill(res.begin(), res.end(), 0);
371
372
74
        StringRef rstr_ref(rdata.data, rdata.size);
373
74
        StringSearch search(&rstr_ref);
374
375
90
        while (pos < end) {
376
            // search return matched substring start offset
377
64
            pos = (UInt8*)search.search((char*)pos, end - pos);
378
64
            if (pos >= end) {
379
48
                break;
380
48
            }
381
382
            /// Determine which index it refers to.
383
            /// begin + value_offsets[i] is the start offset of string at i+1
384
16
            while (begin + loffsets[i] < pos) {
385
0
                ++i;
386
0
            }
387
388
            /// We check that the entry does not pass through the boundaries of strings.
389
16
            if (pos + rdata.size <= begin + loffsets[i]) {
390
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
391
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
392
16
                auto len = std::min(l_str_size, loc);
393
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
394
16
                res[i] = loc + 1;
395
16
            }
396
397
            // move to next string offset
398
16
            pos = begin + loffsets[i];
399
16
            ++i;
400
16
        }
401
402
74
        return Status::OK();
403
86
    }
404
405
    static Status vector_vector(const ColumnString::Chars& ldata,
406
                                const ColumnString::Offsets& loffsets,
407
                                const ColumnString::Chars& rdata,
408
240
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
409
240
        DCHECK_EQ(loffsets.size(), roffsets.size());
410
411
240
        auto size = loffsets.size();
412
240
        res.resize(size);
413
727
        for (int i = 0; i < size; ++i) {
414
487
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
415
487
            int l_str_size = loffsets[i] - loffsets[i - 1];
416
487
            StringRef lstr_ref(l_raw_str, l_str_size);
417
418
487
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
419
487
            int r_str_size = roffsets[i] - roffsets[i - 1];
420
487
            StringRef rstr_ref(r_raw_str, r_str_size);
421
422
487
            res[i] = execute(lstr_ref, rstr_ref);
423
487
        }
424
425
240
        return Status::OK();
426
240
    }
427
428
559
    static int execute(const StringRef& strl, const StringRef& strr) {
429
559
        if (strr.size == 0) {
430
74
            return 1;
431
74
        }
432
433
485
        StringSearch search(&strr);
434
        // Hive returns positions starting from 1.
435
485
        int loc = search.search(&strl);
436
485
        if (loc > 0) {
437
59
            int len = std::min(loc, (int)strl.size);
438
59
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
439
59
        }
440
441
485
        return loc + 1;
442
559
    }
443
};
444
445
// the same impl as instr
446
struct NameLocate {
447
    static constexpr auto name = "locate";
448
};
449
450
// LeftDataType and RightDataType are DataTypeString
451
template <typename LeftDataType, typename RightDataType>
452
struct StringLocateImpl {
453
    using ResultDataType = DataTypeInt32;
454
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
455
456
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
457
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
458
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
459
38
                                                                           res);
460
38
    }
461
462
    static Status vector_scalar(const ColumnString::Chars& ldata,
463
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
464
36
                                ResultPaddedPODArray& res) {
465
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
466
36
                                                                           res);
467
36
    }
468
469
    static Status vector_vector(const ColumnString::Chars& ldata,
470
                                const ColumnString::Offsets& loffsets,
471
                                const ColumnString::Chars& rdata,
472
140
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
473
140
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
474
140
                                                                           loffsets, res);
475
140
    }
476
};
477
478
// LeftDataType and RightDataType are DataTypeString
479
template <typename LeftDataType, typename RightDataType, typename OP>
480
struct StringFunctionImpl {
481
    using ResultDataType = typename OP::ResultDataType;
482
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
483
484
    static Status vector_vector(const ColumnString::Chars& ldata,
485
                                const ColumnString::Offsets& loffsets,
486
                                const ColumnString::Chars& rdata,
487
214
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
214
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
214
        auto size = loffsets.size();
491
214
        res.resize(size);
492
578
        for (int i = 0; i < size; ++i) {
493
364
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
364
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
364
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
364
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
364
            std::string_view lview(l_raw_str, l_str_size);
500
364
            std::string_view rview(r_raw_str, r_str_size);
501
502
364
            OP::execute(lview, rview, res[i]);
503
364
        }
504
214
        return Status::OK();
505
214
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
487
88
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
88
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
88
        auto size = loffsets.size();
491
88
        res.resize(size);
492
215
        for (int i = 0; i < size; ++i) {
493
127
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
127
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
127
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
127
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
127
            std::string_view lview(l_raw_str, l_str_size);
500
127
            std::string_view rview(r_raw_str, r_str_size);
501
502
127
            OP::execute(lview, rview, res[i]);
503
127
        }
504
88
        return Status::OK();
505
88
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
487
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
61
        auto size = loffsets.size();
491
61
        res.resize(size);
492
175
        for (int i = 0; i < size; ++i) {
493
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
114
            std::string_view lview(l_raw_str, l_str_size);
500
114
            std::string_view rview(r_raw_str, r_str_size);
501
502
114
            OP::execute(lview, rview, res[i]);
503
114
        }
504
61
        return Status::OK();
505
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
487
65
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
65
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
65
        auto size = loffsets.size();
491
65
        res.resize(size);
492
188
        for (int i = 0; i < size; ++i) {
493
123
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
123
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
123
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
123
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
123
            std::string_view lview(l_raw_str, l_str_size);
500
123
            std::string_view rview(r_raw_str, r_str_size);
501
502
123
            OP::execute(lview, rview, res[i]);
503
123
        }
504
65
        return Status::OK();
505
65
    }
506
    static Status vector_scalar(const ColumnString::Chars& ldata,
507
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
508
37
                                ResultPaddedPODArray& res) {
509
37
        auto size = loffsets.size();
510
37
        res.resize(size);
511
37
        std::string_view rview(rdata.data, rdata.size);
512
11.3k
        for (int i = 0; i < size; ++i) {
513
11.3k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
11.3k
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
11.3k
            std::string_view lview(l_raw_str, l_str_size);
516
517
11.3k
            OP::execute(lview, rview, res[i]);
518
11.3k
        }
519
37
        return Status::OK();
520
37
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
508
7
                                ResultPaddedPODArray& res) {
509
7
        auto size = loffsets.size();
510
7
        res.resize(size);
511
7
        std::string_view rview(rdata.data, rdata.size);
512
11.3k
        for (int i = 0; i < size; ++i) {
513
11.3k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
11.3k
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
11.3k
            std::string_view lview(l_raw_str, l_str_size);
516
517
11.3k
            OP::execute(lview, rview, res[i]);
518
11.3k
        }
519
7
        return Status::OK();
520
7
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
508
14
                                ResultPaddedPODArray& res) {
509
14
        auto size = loffsets.size();
510
14
        res.resize(size);
511
14
        std::string_view rview(rdata.data, rdata.size);
512
28
        for (int i = 0; i < size; ++i) {
513
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
14
            std::string_view lview(l_raw_str, l_str_size);
516
517
14
            OP::execute(lview, rview, res[i]);
518
14
        }
519
14
        return Status::OK();
520
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
508
16
                                ResultPaddedPODArray& res) {
509
16
        auto size = loffsets.size();
510
16
        res.resize(size);
511
16
        std::string_view rview(rdata.data, rdata.size);
512
32
        for (int i = 0; i < size; ++i) {
513
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
16
            std::string_view lview(l_raw_str, l_str_size);
516
517
16
            OP::execute(lview, rview, res[i]);
518
16
        }
519
16
        return Status::OK();
520
16
    }
521
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
522
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
44
        auto size = roffsets.size();
524
44
        res.resize(size);
525
44
        std::string_view lview(ldata.data, ldata.size);
526
94
        for (int i = 0; i < size; ++i) {
527
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
50
            std::string_view rview(r_raw_str, r_str_size);
530
531
50
            OP::execute(lview, rview, res[i]);
532
50
        }
533
44
        return Status::OK();
534
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
522
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
4
        auto size = roffsets.size();
524
4
        res.resize(size);
525
4
        std::string_view lview(ldata.data, ldata.size);
526
8
        for (int i = 0; i < size; ++i) {
527
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
4
            std::string_view rview(r_raw_str, r_str_size);
530
531
4
            OP::execute(lview, rview, res[i]);
532
4
        }
533
4
        return Status::OK();
534
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
522
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
14
        auto size = roffsets.size();
524
14
        res.resize(size);
525
14
        std::string_view lview(ldata.data, ldata.size);
526
28
        for (int i = 0; i < size; ++i) {
527
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
14
            std::string_view rview(r_raw_str, r_str_size);
530
531
14
            OP::execute(lview, rview, res[i]);
532
14
        }
533
14
        return Status::OK();
534
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
522
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
26
        auto size = roffsets.size();
524
26
        res.resize(size);
525
26
        std::string_view lview(ldata.data, ldata.size);
526
58
        for (int i = 0; i < size; ++i) {
527
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
32
            std::string_view rview(r_raw_str, r_str_size);
530
531
32
            OP::execute(lview, rview, res[i]);
532
32
        }
533
26
        return Status::OK();
534
26
    }
535
};
536
537
struct NameToLower {
538
    static constexpr auto name = "lower";
539
};
540
541
struct NameToUpper {
542
    static constexpr auto name = "upper";
543
};
544
545
template <typename OpName>
546
struct TransferImpl {
547
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
548
331
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
549
331
        size_t offset_size = offsets.size();
550
331
        if (UNLIKELY(!offset_size)) {
551
0
            return Status::OK();
552
0
        }
553
554
331
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
555
331
        res_offsets.resize(offset_size);
556
331
        if (is_ascii) {
557
269
            memcpy_small_allow_read_write_overflow15(
558
269
                    res_offsets.data(), offsets.data(),
559
269
                    offset_size * sizeof(ColumnString::Offsets::value_type));
560
561
269
            size_t data_length = data.size();
562
269
            res_data.resize(data_length);
563
269
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
564
96
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
565
173
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
566
173
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
567
173
            }
568
269
        } else {
569
62
            execute_utf8(data, offsets, res_data, res_offsets);
570
62
        }
571
572
331
        return Status::OK();
573
331
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
548
195
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
549
195
        size_t offset_size = offsets.size();
550
195
        if (UNLIKELY(!offset_size)) {
551
0
            return Status::OK();
552
0
        }
553
554
195
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
555
195
        res_offsets.resize(offset_size);
556
195
        if (is_ascii) {
557
173
            memcpy_small_allow_read_write_overflow15(
558
173
                    res_offsets.data(), offsets.data(),
559
173
                    offset_size * sizeof(ColumnString::Offsets::value_type));
560
561
173
            size_t data_length = data.size();
562
173
            res_data.resize(data_length);
563
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
564
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
565
173
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
566
173
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
567
173
            }
568
173
        } else {
569
22
            execute_utf8(data, offsets, res_data, res_offsets);
570
22
        }
571
572
195
        return Status::OK();
573
195
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
548
136
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
549
136
        size_t offset_size = offsets.size();
550
136
        if (UNLIKELY(!offset_size)) {
551
0
            return Status::OK();
552
0
        }
553
554
136
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
555
136
        res_offsets.resize(offset_size);
556
136
        if (is_ascii) {
557
96
            memcpy_small_allow_read_write_overflow15(
558
96
                    res_offsets.data(), offsets.data(),
559
96
                    offset_size * sizeof(ColumnString::Offsets::value_type));
560
561
96
            size_t data_length = data.size();
562
96
            res_data.resize(data_length);
563
96
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
564
96
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
565
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
566
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
567
            }
568
96
        } else {
569
40
            execute_utf8(data, offsets, res_data, res_offsets);
570
40
        }
571
572
136
        return Status::OK();
573
136
    }
574
575
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
576
62
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
577
62
        std::string result;
578
202
        for (int64_t i = 0; i < offsets.size(); ++i) {
579
140
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
580
140
            uint32_t size = offsets[i] - offsets[i - 1];
581
582
140
            result.clear();
583
140
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
584
92
                to_upper_utf8(begin, size, result);
585
92
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
586
48
                to_lower_utf8(begin, size, result);
587
48
            }
588
140
            StringOP::push_value_string(result, i, res_data, res_offsets);
589
140
        }
590
62
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
576
22
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
577
22
        std::string result;
578
70
        for (int64_t i = 0; i < offsets.size(); ++i) {
579
48
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
580
48
            uint32_t size = offsets[i] - offsets[i - 1];
581
582
48
            result.clear();
583
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
584
                to_upper_utf8(begin, size, result);
585
48
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
586
48
                to_lower_utf8(begin, size, result);
587
48
            }
588
48
            StringOP::push_value_string(result, i, res_data, res_offsets);
589
48
        }
590
22
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
576
40
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
577
40
        std::string result;
578
132
        for (int64_t i = 0; i < offsets.size(); ++i) {
579
92
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
580
92
            uint32_t size = offsets[i] - offsets[i - 1];
581
582
92
            result.clear();
583
92
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
584
92
                to_upper_utf8(begin, size, result);
585
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
586
                to_lower_utf8(begin, size, result);
587
            }
588
92
            StringOP::push_value_string(result, i, res_data, res_offsets);
589
92
        }
590
40
    }
591
592
92
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
593
92
        icu::StringPiece sp;
594
92
        sp.set(data, size);
595
92
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
596
92
        unicode_str.toUpper();
597
92
        unicode_str.toUTF8String(result);
598
92
    }
599
600
48
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
601
48
        icu::StringPiece sp;
602
48
        sp.set(data, size);
603
48
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
604
48
        unicode_str.toLower();
605
48
        unicode_str.toUTF8String(result);
606
48
    }
607
};
608
609
// Capitalize first letter
610
struct NameToInitcap {
611
    static constexpr auto name = "initcap";
612
};
613
614
struct InitcapImpl {
615
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
616
173
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
617
173
        res_offsets.resize(offsets.size());
618
619
173
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
620
173
        if (is_ascii) {
621
114
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
622
114
        } else {
623
59
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
624
59
        }
625
173
        return Status::OK();
626
173
    }
627
628
    static void impl_vectors_ascii(const ColumnString::Chars& data,
629
                                   const ColumnString::Offsets& offsets,
630
                                   ColumnString::Chars& res_data,
631
114
                                   ColumnString::Offsets& res_offsets) {
632
114
        size_t offset_size = offsets.size();
633
114
        memcpy_small_allow_read_write_overflow15(
634
114
                res_offsets.data(), offsets.data(),
635
114
                offset_size * sizeof(ColumnString::Offsets::value_type));
636
637
114
        size_t data_length = data.size();
638
114
        res_data.resize(data_length);
639
114
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
640
641
114
        bool need_capitalize = true;
642
246
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
643
132
            auto end_index = res_offsets[offset_index];
644
132
            need_capitalize = true;
645
646
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
647
1.43k
                if (!::isalnum(res_data[i])) {
648
216
                    need_capitalize = true;
649
1.21k
                } else if (need_capitalize) {
650
                    /*
651
                    https://en.cppreference.com/w/cpp/string/byte/toupper
652
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
653
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
654
                    char my_toupper(char ch)
655
                    {
656
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
657
                    }
658
                    */
659
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
660
267
                    need_capitalize = false;
661
267
                }
662
1.43k
            }
663
664
132
            start_index = end_index;
665
132
        }
666
114
    }
667
668
    static void impl_vectors_utf8(const ColumnString::Chars& data,
669
                                  const ColumnString::Offsets& offsets,
670
                                  ColumnString::Chars& res_data,
671
59
                                  ColumnString::Offsets& res_offsets) {
672
59
        std::string result;
673
125
        for (int64_t i = 0; i < offsets.size(); ++i) {
674
66
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
675
66
            uint32_t size = offsets[i] - offsets[i - 1];
676
66
            result.clear();
677
66
            to_initcap_utf8(begin, size, result);
678
66
            StringOP::push_value_string(result, i, res_data, res_offsets);
679
66
        }
680
59
    }
681
682
66
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
683
66
        icu::StringPiece sp;
684
66
        sp.set(data, size);
685
66
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
686
66
        unicode_str.toLower();
687
66
        icu::UnicodeString output_str;
688
66
        bool need_capitalize = true;
689
66
        icu::StringCharacterIterator iter(unicode_str);
690
662
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
691
596
            if (!u_isalnum(ch)) {
692
107
                need_capitalize = true;
693
489
            } else if (need_capitalize) {
694
90
                ch = u_toupper(ch);
695
90
                need_capitalize = false;
696
90
            }
697
596
            output_str.append(ch);
698
596
        }
699
66
        output_str.toUTF8String(result);
700
66
    }
701
};
702
703
struct NameTrim {
704
    static constexpr auto name = "trim";
705
};
706
struct NameLTrim {
707
    static constexpr auto name = "ltrim";
708
};
709
struct NameRTrim {
710
    static constexpr auto name = "rtrim";
711
};
712
struct NameTrimIn {
713
    static constexpr auto name = "trim_in";
714
};
715
struct NameLTrimIn {
716
    static constexpr auto name = "ltrim_in";
717
};
718
struct NameRTrimIn {
719
    static constexpr auto name = "rtrim_in";
720
};
721
template <bool is_ltrim, bool is_rtrim, bool trim_single>
722
struct TrimUtil {
723
    static Status vector(const ColumnString::Chars& str_data,
724
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
725
300
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
300
        const size_t offset_size = str_offsets.size();
727
300
        res_offsets.resize(offset_size);
728
300
        res_data.reserve(str_data.size());
729
852
        for (size_t i = 0; i < offset_size; ++i) {
730
552
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
552
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
552
            if constexpr (is_ltrim) {
734
335
                str_begin =
735
335
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
335
            }
737
552
            if constexpr (is_rtrim) {
738
395
                str_end =
739
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
395
            }
741
742
552
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
552
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
552
        }
746
300
        return Status::OK();
747
300
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
58
        const size_t offset_size = str_offsets.size();
727
58
        res_offsets.resize(offset_size);
728
58
        res_data.reserve(str_data.size());
729
178
        for (size_t i = 0; i < offset_size; ++i) {
730
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
120
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
120
            if constexpr (is_ltrim) {
734
120
                str_begin =
735
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
120
            }
737
120
            if constexpr (is_rtrim) {
738
120
                str_end =
739
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
120
            }
741
742
120
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
120
        }
746
58
        return Status::OK();
747
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
52
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
52
        const size_t offset_size = str_offsets.size();
727
52
        res_offsets.resize(offset_size);
728
52
        res_data.reserve(str_data.size());
729
148
        for (size_t i = 0; i < offset_size; ++i) {
730
96
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
96
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
96
            if constexpr (is_ltrim) {
734
96
                str_begin =
735
96
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
96
            }
737
            if constexpr (is_rtrim) {
738
                str_end =
739
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
            }
741
742
96
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
96
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
96
        }
746
52
        return Status::OK();
747
52
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
94
        const size_t offset_size = str_offsets.size();
727
94
        res_offsets.resize(offset_size);
728
94
        res_data.reserve(str_data.size());
729
266
        for (size_t i = 0; i < offset_size; ++i) {
730
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
172
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
            if constexpr (is_ltrim) {
734
                str_begin =
735
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
            }
737
172
            if constexpr (is_rtrim) {
738
172
                str_end =
739
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
172
            }
741
742
172
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
172
        }
746
94
        return Status::OK();
747
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
24
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
24
        const size_t offset_size = str_offsets.size();
727
24
        res_offsets.resize(offset_size);
728
24
        res_data.reserve(str_data.size());
729
82
        for (size_t i = 0; i < offset_size; ++i) {
730
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
58
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
58
            if constexpr (is_ltrim) {
734
58
                str_begin =
735
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
58
            }
737
58
            if constexpr (is_rtrim) {
738
58
                str_end =
739
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
58
            }
741
742
58
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
58
        }
746
24
        return Status::OK();
747
24
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
27
        const size_t offset_size = str_offsets.size();
727
27
        res_offsets.resize(offset_size);
728
27
        res_data.reserve(str_data.size());
729
88
        for (size_t i = 0; i < offset_size; ++i) {
730
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
61
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
61
            if constexpr (is_ltrim) {
734
61
                str_begin =
735
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
61
            }
737
            if constexpr (is_rtrim) {
738
                str_end =
739
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
            }
741
742
61
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
61
        }
746
27
        return Status::OK();
747
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
45
        const size_t offset_size = str_offsets.size();
727
45
        res_offsets.resize(offset_size);
728
45
        res_data.reserve(str_data.size());
729
90
        for (size_t i = 0; i < offset_size; ++i) {
730
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
45
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
            if constexpr (is_ltrim) {
734
                str_begin =
735
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
            }
737
45
            if constexpr (is_rtrim) {
738
45
                str_end =
739
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
45
            }
741
742
45
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
45
        }
746
45
        return Status::OK();
747
45
    }
748
};
749
template <bool is_ltrim, bool is_rtrim, bool trim_single>
750
struct TrimInUtil {
751
    static Status vector(const ColumnString::Chars& str_data,
752
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
753
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
121
        const size_t offset_size = str_offsets.size();
755
121
        res_offsets.resize(offset_size);
756
121
        res_data.reserve(str_data.size());
757
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
121
                         simd::VStringFunctions::is_ascii(StringRef(
759
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
121
        if (all_ascii) {
762
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
68
        } else {
764
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
53
        }
766
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
753
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
43
        const size_t offset_size = str_offsets.size();
755
43
        res_offsets.resize(offset_size);
756
43
        res_data.reserve(str_data.size());
757
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
43
                         simd::VStringFunctions::is_ascii(StringRef(
759
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
43
        if (all_ascii) {
762
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
24
        } else {
764
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
19
        }
766
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
753
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
36
        const size_t offset_size = str_offsets.size();
755
36
        res_offsets.resize(offset_size);
756
36
        res_data.reserve(str_data.size());
757
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
36
                         simd::VStringFunctions::is_ascii(StringRef(
759
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
36
        if (all_ascii) {
762
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
19
        } else {
764
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
17
        }
766
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
753
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
42
        const size_t offset_size = str_offsets.size();
755
42
        res_offsets.resize(offset_size);
756
42
        res_data.reserve(str_data.size());
757
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
42
                         simd::VStringFunctions::is_ascii(StringRef(
759
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
42
        if (all_ascii) {
762
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
25
        } else {
764
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
17
        }
766
42
    }
767
768
private:
769
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
770
                                     const ColumnString::Offsets& str_offsets,
771
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
772
68
                                     ColumnString::Offsets& res_offsets) {
773
68
        const size_t offset_size = str_offsets.size();
774
68
        std::bitset<128> char_lookup;
775
68
        const char* remove_begin = remove_str.data;
776
68
        const char* remove_end = remove_str.data + remove_str.size;
777
778
251
        while (remove_begin < remove_end) {
779
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
183
            remove_begin += 1;
781
183
        }
782
783
136
        for (size_t i = 0; i < offset_size; ++i) {
784
68
            const char* str_begin =
785
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
68
            const char* left_trim_pos = str_begin;
788
68
            const char* right_trim_pos = str_end;
789
790
68
            if constexpr (is_ltrim) {
791
127
                while (left_trim_pos < str_end) {
792
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
30
                        break;
794
30
                    }
795
84
                    ++left_trim_pos;
796
84
                }
797
43
            }
798
799
68
            if constexpr (is_rtrim) {
800
114
                while (right_trim_pos > left_trim_pos) {
801
100
                    --right_trim_pos;
802
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
35
                        ++right_trim_pos;
804
35
                        break;
805
35
                    }
806
100
                }
807
49
            }
808
809
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
68
        }
813
814
68
        return Status::OK();
815
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
772
24
                                     ColumnString::Offsets& res_offsets) {
773
24
        const size_t offset_size = str_offsets.size();
774
24
        std::bitset<128> char_lookup;
775
24
        const char* remove_begin = remove_str.data;
776
24
        const char* remove_end = remove_str.data + remove_str.size;
777
778
86
        while (remove_begin < remove_end) {
779
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
62
            remove_begin += 1;
781
62
        }
782
783
48
        for (size_t i = 0; i < offset_size; ++i) {
784
24
            const char* str_begin =
785
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
24
            const char* left_trim_pos = str_begin;
788
24
            const char* right_trim_pos = str_end;
789
790
24
            if constexpr (is_ltrim) {
791
57
                while (left_trim_pos < str_end) {
792
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
17
                        break;
794
17
                    }
795
33
                    ++left_trim_pos;
796
33
                }
797
24
            }
798
799
24
            if constexpr (is_rtrim) {
800
39
                while (right_trim_pos > left_trim_pos) {
801
32
                    --right_trim_pos;
802
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
17
                        ++right_trim_pos;
804
17
                        break;
805
17
                    }
806
32
                }
807
24
            }
808
809
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
24
        }
813
814
24
        return Status::OK();
815
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
772
19
                                     ColumnString::Offsets& res_offsets) {
773
19
        const size_t offset_size = str_offsets.size();
774
19
        std::bitset<128> char_lookup;
775
19
        const char* remove_begin = remove_str.data;
776
19
        const char* remove_end = remove_str.data + remove_str.size;
777
778
73
        while (remove_begin < remove_end) {
779
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
54
            remove_begin += 1;
781
54
        }
782
783
38
        for (size_t i = 0; i < offset_size; ++i) {
784
19
            const char* str_begin =
785
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
19
            const char* left_trim_pos = str_begin;
788
19
            const char* right_trim_pos = str_end;
789
790
19
            if constexpr (is_ltrim) {
791
70
                while (left_trim_pos < str_end) {
792
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
13
                        break;
794
13
                    }
795
51
                    ++left_trim_pos;
796
51
                }
797
19
            }
798
799
            if constexpr (is_rtrim) {
800
                while (right_trim_pos > left_trim_pos) {
801
                    --right_trim_pos;
802
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
                        ++right_trim_pos;
804
                        break;
805
                    }
806
                }
807
            }
808
809
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
19
        }
813
814
19
        return Status::OK();
815
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
772
25
                                     ColumnString::Offsets& res_offsets) {
773
25
        const size_t offset_size = str_offsets.size();
774
25
        std::bitset<128> char_lookup;
775
25
        const char* remove_begin = remove_str.data;
776
25
        const char* remove_end = remove_str.data + remove_str.size;
777
778
92
        while (remove_begin < remove_end) {
779
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
67
            remove_begin += 1;
781
67
        }
782
783
50
        for (size_t i = 0; i < offset_size; ++i) {
784
25
            const char* str_begin =
785
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
25
            const char* left_trim_pos = str_begin;
788
25
            const char* right_trim_pos = str_end;
789
790
            if constexpr (is_ltrim) {
791
                while (left_trim_pos < str_end) {
792
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
                        break;
794
                    }
795
                    ++left_trim_pos;
796
                }
797
            }
798
799
25
            if constexpr (is_rtrim) {
800
75
                while (right_trim_pos > left_trim_pos) {
801
68
                    --right_trim_pos;
802
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
18
                        ++right_trim_pos;
804
18
                        break;
805
18
                    }
806
68
                }
807
25
            }
808
809
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
25
        }
813
814
25
        return Status::OK();
815
25
    }
816
817
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
818
                                    const ColumnString::Offsets& str_offsets,
819
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
820
53
                                    ColumnString::Offsets& res_offsets) {
821
53
        const size_t offset_size = str_offsets.size();
822
53
        res_offsets.resize(offset_size);
823
53
        res_data.reserve(str_data.size());
824
825
53
        std::unordered_set<std::string_view> char_lookup;
826
53
        const char* remove_begin = remove_str.data;
827
53
        const char* remove_end = remove_str.data + remove_str.size;
828
829
240
        while (remove_begin < remove_end) {
830
187
            size_t byte_len, char_len;
831
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
187
                    remove_begin, remove_end, 1);
833
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
187
            remove_begin += byte_len;
835
187
        }
836
837
140
        for (size_t i = 0; i < offset_size; ++i) {
838
87
            const char* str_begin =
839
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
87
            const char* left_trim_pos = str_begin;
842
87
            const char* right_trim_pos = str_end;
843
844
87
            if constexpr (is_ltrim) {
845
81
                while (left_trim_pos < str_end) {
846
73
                    size_t byte_len, char_len;
847
73
                    std::tie(byte_len, char_len) =
848
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
73
                                                                                   str_end, 1);
850
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
73
                        char_lookup.end()) {
852
52
                        break;
853
52
                    }
854
21
                    left_trim_pos += byte_len;
855
21
                }
856
60
            }
857
858
87
            if constexpr (is_rtrim) {
859
88
                while (right_trim_pos > left_trim_pos) {
860
80
                    const char* prev_char_pos = right_trim_pos;
861
156
                    do {
862
156
                        --prev_char_pos;
863
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
80
                        char_lookup.end()) {
867
52
                        break;
868
52
                    }
869
28
                    right_trim_pos = prev_char_pos;
870
28
                }
871
60
            }
872
873
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
87
        }
877
53
        return Status::OK();
878
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
820
19
                                    ColumnString::Offsets& res_offsets) {
821
19
        const size_t offset_size = str_offsets.size();
822
19
        res_offsets.resize(offset_size);
823
19
        res_data.reserve(str_data.size());
824
825
19
        std::unordered_set<std::string_view> char_lookup;
826
19
        const char* remove_begin = remove_str.data;
827
19
        const char* remove_end = remove_str.data + remove_str.size;
828
829
84
        while (remove_begin < remove_end) {
830
65
            size_t byte_len, char_len;
831
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
65
                    remove_begin, remove_end, 1);
833
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
65
            remove_begin += byte_len;
835
65
        }
836
837
52
        for (size_t i = 0; i < offset_size; ++i) {
838
33
            const char* str_begin =
839
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
33
            const char* left_trim_pos = str_begin;
842
33
            const char* right_trim_pos = str_end;
843
844
33
            if constexpr (is_ltrim) {
845
45
                while (left_trim_pos < str_end) {
846
41
                    size_t byte_len, char_len;
847
41
                    std::tie(byte_len, char_len) =
848
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
41
                                                                                   str_end, 1);
850
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
41
                        char_lookup.end()) {
852
29
                        break;
853
29
                    }
854
12
                    left_trim_pos += byte_len;
855
12
                }
856
33
            }
857
858
33
            if constexpr (is_rtrim) {
859
48
                while (right_trim_pos > left_trim_pos) {
860
44
                    const char* prev_char_pos = right_trim_pos;
861
90
                    do {
862
90
                        --prev_char_pos;
863
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
44
                        char_lookup.end()) {
867
29
                        break;
868
29
                    }
869
15
                    right_trim_pos = prev_char_pos;
870
15
                }
871
33
            }
872
873
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
33
        }
877
19
        return Status::OK();
878
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
820
17
                                    ColumnString::Offsets& res_offsets) {
821
17
        const size_t offset_size = str_offsets.size();
822
17
        res_offsets.resize(offset_size);
823
17
        res_data.reserve(str_data.size());
824
825
17
        std::unordered_set<std::string_view> char_lookup;
826
17
        const char* remove_begin = remove_str.data;
827
17
        const char* remove_end = remove_str.data + remove_str.size;
828
829
78
        while (remove_begin < remove_end) {
830
61
            size_t byte_len, char_len;
831
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
61
                    remove_begin, remove_end, 1);
833
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
61
            remove_begin += byte_len;
835
61
        }
836
837
44
        for (size_t i = 0; i < offset_size; ++i) {
838
27
            const char* str_begin =
839
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
27
            const char* left_trim_pos = str_begin;
842
27
            const char* right_trim_pos = str_end;
843
844
27
            if constexpr (is_ltrim) {
845
36
                while (left_trim_pos < str_end) {
846
32
                    size_t byte_len, char_len;
847
32
                    std::tie(byte_len, char_len) =
848
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
32
                                                                                   str_end, 1);
850
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
32
                        char_lookup.end()) {
852
23
                        break;
853
23
                    }
854
9
                    left_trim_pos += byte_len;
855
9
                }
856
27
            }
857
858
            if constexpr (is_rtrim) {
859
                while (right_trim_pos > left_trim_pos) {
860
                    const char* prev_char_pos = right_trim_pos;
861
                    do {
862
                        --prev_char_pos;
863
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
                        char_lookup.end()) {
867
                        break;
868
                    }
869
                    right_trim_pos = prev_char_pos;
870
                }
871
            }
872
873
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
27
        }
877
17
        return Status::OK();
878
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
820
17
                                    ColumnString::Offsets& res_offsets) {
821
17
        const size_t offset_size = str_offsets.size();
822
17
        res_offsets.resize(offset_size);
823
17
        res_data.reserve(str_data.size());
824
825
17
        std::unordered_set<std::string_view> char_lookup;
826
17
        const char* remove_begin = remove_str.data;
827
17
        const char* remove_end = remove_str.data + remove_str.size;
828
829
78
        while (remove_begin < remove_end) {
830
61
            size_t byte_len, char_len;
831
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
61
                    remove_begin, remove_end, 1);
833
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
61
            remove_begin += byte_len;
835
61
        }
836
837
44
        for (size_t i = 0; i < offset_size; ++i) {
838
27
            const char* str_begin =
839
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
27
            const char* left_trim_pos = str_begin;
842
27
            const char* right_trim_pos = str_end;
843
844
            if constexpr (is_ltrim) {
845
                while (left_trim_pos < str_end) {
846
                    size_t byte_len, char_len;
847
                    std::tie(byte_len, char_len) =
848
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
                                                                                   str_end, 1);
850
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
                        char_lookup.end()) {
852
                        break;
853
                    }
854
                    left_trim_pos += byte_len;
855
                }
856
            }
857
858
27
            if constexpr (is_rtrim) {
859
40
                while (right_trim_pos > left_trim_pos) {
860
36
                    const char* prev_char_pos = right_trim_pos;
861
66
                    do {
862
66
                        --prev_char_pos;
863
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
36
                        char_lookup.end()) {
867
23
                        break;
868
23
                    }
869
13
                    right_trim_pos = prev_char_pos;
870
13
                }
871
27
            }
872
873
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
27
        }
877
17
        return Status::OK();
878
17
    }
879
};
880
// This is an implementation of a parameter for the Trim function.
881
template <bool is_ltrim, bool is_rtrim, typename Name>
882
struct Trim1Impl {
883
    static constexpr auto name = Name::name;
884
885
157
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
885
45
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
885
35
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
885
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
885
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
885
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
885
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
886
887
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
888
139
                          uint32_t result, size_t input_rows_count) {
889
139
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
139
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
139
            auto col_res = ColumnString::create();
892
139
            char blank[] = " ";
893
139
            const StringRef remove_str(blank, 1);
894
139
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
139
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
139
                    col_res->get_offsets())));
897
139
            block.replace_by_position(result, std::move(col_res));
898
139
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
139
        return Status::OK();
904
139
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
48
                          uint32_t result, size_t input_rows_count) {
889
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
48
            auto col_res = ColumnString::create();
892
48
            char blank[] = " ";
893
48
            const StringRef remove_str(blank, 1);
894
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
48
                    col_res->get_offsets())));
897
48
            block.replace_by_position(result, std::move(col_res));
898
48
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
48
        return Status::OK();
904
48
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
37
                          uint32_t result, size_t input_rows_count) {
889
37
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
37
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
37
            auto col_res = ColumnString::create();
892
37
            char blank[] = " ";
893
37
            const StringRef remove_str(blank, 1);
894
37
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
37
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
37
                    col_res->get_offsets())));
897
37
            block.replace_by_position(result, std::move(col_res));
898
37
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
37
        return Status::OK();
904
37
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
42
                          uint32_t result, size_t input_rows_count) {
889
42
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
42
            auto col_res = ColumnString::create();
892
42
            char blank[] = " ";
893
42
            const StringRef remove_str(blank, 1);
894
42
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
42
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
42
                    col_res->get_offsets())));
897
42
            block.replace_by_position(result, std::move(col_res));
898
42
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
42
        return Status::OK();
904
42
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
1
                          uint32_t result, size_t input_rows_count) {
889
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
1
            auto col_res = ColumnString::create();
892
1
            char blank[] = " ";
893
1
            const StringRef remove_str(blank, 1);
894
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
1
                    col_res->get_offsets())));
897
1
            block.replace_by_position(result, std::move(col_res));
898
1
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
1
        return Status::OK();
904
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
5
                          uint32_t result, size_t input_rows_count) {
889
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
5
            auto col_res = ColumnString::create();
892
5
            char blank[] = " ";
893
5
            const StringRef remove_str(blank, 1);
894
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
5
                    col_res->get_offsets())));
897
5
            block.replace_by_position(result, std::move(col_res));
898
5
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
5
        return Status::OK();
904
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
6
                          uint32_t result, size_t input_rows_count) {
889
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
6
            auto col_res = ColumnString::create();
892
6
            char blank[] = " ";
893
6
            const StringRef remove_str(blank, 1);
894
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
6
                    col_res->get_offsets())));
897
6
            block.replace_by_position(result, std::move(col_res));
898
6
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
6
        return Status::OK();
904
6
    }
905
};
906
907
// This is an implementation of two parameters for the Trim function.
908
template <bool is_ltrim, bool is_rtrim, typename Name>
909
struct Trim2Impl {
910
    static constexpr auto name = Name::name;
911
912
226
    static DataTypes get_variadic_argument_types() {
913
226
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
226
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
912
20
    static DataTypes get_variadic_argument_types() {
913
20
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
20
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
912
29
    static DataTypes get_variadic_argument_types() {
913
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
912
84
    static DataTypes get_variadic_argument_types() {
913
84
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
84
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
912
27
    static DataTypes get_variadic_argument_types() {
913
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
912
29
    static DataTypes get_variadic_argument_types() {
913
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
912
37
    static DataTypes get_variadic_argument_types() {
913
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
37
    }
915
916
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
917
282
                          uint32_t result, size_t input_rows_count) {
918
282
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
282
        const auto& rcol =
920
282
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
282
                        ->get_data_column_ptr();
922
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
282
                auto col_res = ColumnString::create();
925
282
                const auto* remove_str_raw = col_right->get_chars().data();
926
282
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
282
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
282
                if (remove_str.size == 1) {
930
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
65
                            col_res->get_offsets())));
933
217
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
121
                                  std::is_same<Name, NameRTrimIn>::value) {
937
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
121
                                col->get_chars(), col->get_offsets(), remove_str,
939
121
                                col_res->get_chars(), col_res->get_offsets())));
940
121
                    } else {
941
96
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
96
                                col->get_chars(), col->get_offsets(), remove_str,
943
96
                                col_res->get_chars(), col_res->get_offsets())));
944
96
                    }
945
217
                }
946
282
                block.replace_by_position(result, std::move(col_res));
947
282
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
282
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
282
        return Status::OK();
959
282
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
26
                          uint32_t result, size_t input_rows_count) {
918
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
26
        const auto& rcol =
920
26
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
26
                        ->get_data_column_ptr();
922
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
26
                auto col_res = ColumnString::create();
925
26
                const auto* remove_str_raw = col_right->get_chars().data();
926
26
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
26
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
26
                if (remove_str.size == 1) {
930
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
2
                            col_res->get_offsets())));
933
24
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
                                  std::is_same<Name, NameRTrimIn>::value) {
937
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
                                col->get_chars(), col->get_offsets(), remove_str,
939
                                col_res->get_chars(), col_res->get_offsets())));
940
24
                    } else {
941
24
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
24
                                col->get_chars(), col->get_offsets(), remove_str,
943
24
                                col_res->get_chars(), col_res->get_offsets())));
944
24
                    }
945
24
                }
946
26
                block.replace_by_position(result, std::move(col_res));
947
26
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
26
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
26
        return Status::OK();
959
26
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
32
                          uint32_t result, size_t input_rows_count) {
918
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
32
        const auto& rcol =
920
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
32
                        ->get_data_column_ptr();
922
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
32
                auto col_res = ColumnString::create();
925
32
                const auto* remove_str_raw = col_right->get_chars().data();
926
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
32
                if (remove_str.size == 1) {
930
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
5
                            col_res->get_offsets())));
933
27
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
                                  std::is_same<Name, NameRTrimIn>::value) {
937
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
                                col->get_chars(), col->get_offsets(), remove_str,
939
                                col_res->get_chars(), col_res->get_offsets())));
940
27
                    } else {
941
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
27
                                col->get_chars(), col->get_offsets(), remove_str,
943
27
                                col_res->get_chars(), col_res->get_offsets())));
944
27
                    }
945
27
                }
946
32
                block.replace_by_position(result, std::move(col_res));
947
32
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
32
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
32
        return Status::OK();
959
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
85
                          uint32_t result, size_t input_rows_count) {
918
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
85
        const auto& rcol =
920
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
85
                        ->get_data_column_ptr();
922
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
85
                auto col_res = ColumnString::create();
925
85
                const auto* remove_str_raw = col_right->get_chars().data();
926
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
85
                if (remove_str.size == 1) {
930
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
40
                            col_res->get_offsets())));
933
45
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
                                  std::is_same<Name, NameRTrimIn>::value) {
937
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
                                col->get_chars(), col->get_offsets(), remove_str,
939
                                col_res->get_chars(), col_res->get_offsets())));
940
45
                    } else {
941
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
45
                                col->get_chars(), col->get_offsets(), remove_str,
943
45
                                col_res->get_chars(), col_res->get_offsets())));
944
45
                    }
945
45
                }
946
85
                block.replace_by_position(result, std::move(col_res));
947
85
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
85
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
85
        return Status::OK();
959
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
50
                          uint32_t result, size_t input_rows_count) {
918
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
50
        const auto& rcol =
920
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
50
                        ->get_data_column_ptr();
922
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
50
                auto col_res = ColumnString::create();
925
50
                const auto* remove_str_raw = col_right->get_chars().data();
926
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
50
                if (remove_str.size == 1) {
930
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
7
                            col_res->get_offsets())));
933
43
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
43
                                  std::is_same<Name, NameRTrimIn>::value) {
937
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
43
                                col->get_chars(), col->get_offsets(), remove_str,
939
43
                                col_res->get_chars(), col_res->get_offsets())));
940
                    } else {
941
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
                                col->get_chars(), col->get_offsets(), remove_str,
943
                                col_res->get_chars(), col_res->get_offsets())));
944
                    }
945
43
                }
946
50
                block.replace_by_position(result, std::move(col_res));
947
50
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
50
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
50
        return Status::OK();
959
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
41
                          uint32_t result, size_t input_rows_count) {
918
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
41
        const auto& rcol =
920
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
41
                        ->get_data_column_ptr();
922
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
41
                auto col_res = ColumnString::create();
925
41
                const auto* remove_str_raw = col_right->get_chars().data();
926
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
41
                if (remove_str.size == 1) {
930
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
5
                            col_res->get_offsets())));
933
36
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
36
                                  std::is_same<Name, NameRTrimIn>::value) {
937
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
36
                                col->get_chars(), col->get_offsets(), remove_str,
939
36
                                col_res->get_chars(), col_res->get_offsets())));
940
                    } else {
941
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
                                col->get_chars(), col->get_offsets(), remove_str,
943
                                col_res->get_chars(), col_res->get_offsets())));
944
                    }
945
36
                }
946
41
                block.replace_by_position(result, std::move(col_res));
947
41
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
41
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
41
        return Status::OK();
959
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
48
                          uint32_t result, size_t input_rows_count) {
918
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
48
        const auto& rcol =
920
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
48
                        ->get_data_column_ptr();
922
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
48
                auto col_res = ColumnString::create();
925
48
                const auto* remove_str_raw = col_right->get_chars().data();
926
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
48
                if (remove_str.size == 1) {
930
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
6
                            col_res->get_offsets())));
933
42
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
42
                                  std::is_same<Name, NameRTrimIn>::value) {
937
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
42
                                col->get_chars(), col->get_offsets(), remove_str,
939
42
                                col_res->get_chars(), col_res->get_offsets())));
940
                    } else {
941
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
                                col->get_chars(), col->get_offsets(), remove_str,
943
                                col_res->get_chars(), col_res->get_offsets())));
944
                    }
945
42
                }
946
48
                block.replace_by_position(result, std::move(col_res));
947
48
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
48
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
48
        return Status::OK();
959
48
    }
960
};
961
962
template <typename impl>
963
class FunctionTrim : public IFunction {
964
public:
965
    static constexpr auto name = impl::name;
966
395
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
966
46
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
966
36
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
966
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
966
21
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
966
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
966
85
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
966
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
966
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
966
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
966
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
966
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
966
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
967
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
968
969
287
    size_t get_number_of_arguments() const override {
970
287
        return get_variadic_argument_types_impl().size();
971
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
37
    size_t get_number_of_arguments() const override {
970
37
        return get_variadic_argument_types_impl().size();
971
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
27
    size_t get_number_of_arguments() const override {
970
27
        return get_variadic_argument_types_impl().size();
971
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
33
    size_t get_number_of_arguments() const override {
970
33
        return get_variadic_argument_types_impl().size();
971
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
12
    size_t get_number_of_arguments() const override {
970
12
        return get_variadic_argument_types_impl().size();
971
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
21
    size_t get_number_of_arguments() const override {
970
21
        return get_variadic_argument_types_impl().size();
971
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
76
    size_t get_number_of_arguments() const override {
970
76
        return get_variadic_argument_types_impl().size();
971
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
1
    size_t get_number_of_arguments() const override {
970
1
        return get_variadic_argument_types_impl().size();
971
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
5
    size_t get_number_of_arguments() const override {
970
5
        return get_variadic_argument_types_impl().size();
971
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
6
    size_t get_number_of_arguments() const override {
970
6
        return get_variadic_argument_types_impl().size();
971
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
19
    size_t get_number_of_arguments() const override {
970
19
        return get_variadic_argument_types_impl().size();
971
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
21
    size_t get_number_of_arguments() const override {
970
21
        return get_variadic_argument_types_impl().size();
971
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
29
    size_t get_number_of_arguments() const override {
970
29
        return get_variadic_argument_types_impl().size();
971
29
    }
972
973
287
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
287
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
287
        return arguments[0];
980
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
37
        return arguments[0];
980
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
27
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
27
        return arguments[0];
980
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
33
        return arguments[0];
980
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
12
        return arguments[0];
980
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
21
        return arguments[0];
980
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
76
        return arguments[0];
980
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
1
        return arguments[0];
980
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
5
        return arguments[0];
980
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
6
        return arguments[0];
980
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
19
        return arguments[0];
980
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
21
        return arguments[0];
980
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
29
        return arguments[0];
980
29
    }
981
    // The second parameter of "trim" is a constant.
982
570
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
58
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
37
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
983
984
383
    DataTypes get_variadic_argument_types_impl() const override {
985
383
        return impl::get_variadic_argument_types();
986
383
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
45
    DataTypes get_variadic_argument_types_impl() const override {
985
45
        return impl::get_variadic_argument_types();
986
45
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
35
    DataTypes get_variadic_argument_types_impl() const override {
985
35
        return impl::get_variadic_argument_types();
986
35
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
41
    DataTypes get_variadic_argument_types_impl() const override {
985
41
        return impl::get_variadic_argument_types();
986
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
20
    DataTypes get_variadic_argument_types_impl() const override {
985
20
        return impl::get_variadic_argument_types();
986
20
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
29
    DataTypes get_variadic_argument_types_impl() const override {
985
29
        return impl::get_variadic_argument_types();
986
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
84
    DataTypes get_variadic_argument_types_impl() const override {
985
84
        return impl::get_variadic_argument_types();
986
84
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
9
    DataTypes get_variadic_argument_types_impl() const override {
985
9
        return impl::get_variadic_argument_types();
986
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
13
    DataTypes get_variadic_argument_types_impl() const override {
985
13
        return impl::get_variadic_argument_types();
986
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
14
    DataTypes get_variadic_argument_types_impl() const override {
985
14
        return impl::get_variadic_argument_types();
986
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
27
    DataTypes get_variadic_argument_types_impl() const override {
985
27
        return impl::get_variadic_argument_types();
986
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
29
    DataTypes get_variadic_argument_types_impl() const override {
985
29
        return impl::get_variadic_argument_types();
986
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
37
    DataTypes get_variadic_argument_types_impl() const override {
985
37
        return impl::get_variadic_argument_types();
986
37
    }
987
988
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
989
421
                        uint32_t result, size_t input_rows_count) const override {
990
421
        return impl::execute(context, block, arguments, result, input_rows_count);
991
421
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
48
                        uint32_t result, size_t input_rows_count) const override {
990
48
        return impl::execute(context, block, arguments, result, input_rows_count);
991
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
37
                        uint32_t result, size_t input_rows_count) const override {
990
37
        return impl::execute(context, block, arguments, result, input_rows_count);
991
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
42
                        uint32_t result, size_t input_rows_count) const override {
990
42
        return impl::execute(context, block, arguments, result, input_rows_count);
991
42
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
26
                        uint32_t result, size_t input_rows_count) const override {
990
26
        return impl::execute(context, block, arguments, result, input_rows_count);
991
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
32
                        uint32_t result, size_t input_rows_count) const override {
990
32
        return impl::execute(context, block, arguments, result, input_rows_count);
991
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
85
                        uint32_t result, size_t input_rows_count) const override {
990
85
        return impl::execute(context, block, arguments, result, input_rows_count);
991
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
1
                        uint32_t result, size_t input_rows_count) const override {
990
1
        return impl::execute(context, block, arguments, result, input_rows_count);
991
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
5
                        uint32_t result, size_t input_rows_count) const override {
990
5
        return impl::execute(context, block, arguments, result, input_rows_count);
991
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
6
                        uint32_t result, size_t input_rows_count) const override {
990
6
        return impl::execute(context, block, arguments, result, input_rows_count);
991
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
50
                        uint32_t result, size_t input_rows_count) const override {
990
50
        return impl::execute(context, block, arguments, result, input_rows_count);
991
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
41
                        uint32_t result, size_t input_rows_count) const override {
990
41
        return impl::execute(context, block, arguments, result, input_rows_count);
991
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
48
                        uint32_t result, size_t input_rows_count) const override {
990
48
        return impl::execute(context, block, arguments, result, input_rows_count);
991
48
    }
992
};
993
994
struct UnHexImplEmpty {
995
    static constexpr auto name = "unhex";
996
};
997
998
struct UnHexImplNull {
999
    static constexpr auto name = "unhex_null";
1000
};
1001
1002
template <typename Name>
1003
struct UnHexImpl {
1004
    static constexpr auto name = Name::name;
1005
    using ReturnType = DataTypeString;
1006
    using ColumnType = ColumnString;
1007
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1008
1009
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1010
160
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1011
160
        auto rows_count = offsets.size();
1012
160
        dst_offsets.resize(rows_count);
1013
1014
160
        int64_t total_size = 0;
1015
368
        for (size_t i = 0; i < rows_count; i++) {
1016
208
            size_t len = offsets[i] - offsets[i - 1];
1017
208
            total_size += len / 2;
1018
208
        }
1019
160
        ColumnString::check_chars_length(total_size, rows_count);
1020
160
        dst_data.resize(total_size);
1021
160
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1022
160
        size_t offset = 0;
1023
1024
368
        for (int i = 0; i < rows_count; ++i) {
1025
208
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1026
208
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1027
1028
208
            if (UNLIKELY(srclen == 0)) {
1029
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1030
13
                continue;
1031
13
            }
1032
1033
195
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1034
1035
195
            offset += outlen;
1036
195
            dst_offsets[i] = cast_set<uint32_t>(offset);
1037
195
        }
1038
160
        dst_data.pop_back(total_size - offset);
1039
160
        return Status::OK();
1040
160
    }
1041
1042
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1043
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1044
33
                         ColumnUInt8::Container* null_map_data) {
1045
33
        auto rows_count = offsets.size();
1046
33
        dst_offsets.resize(rows_count);
1047
1048
33
        int64_t total_size = 0;
1049
84
        for (size_t i = 0; i < rows_count; i++) {
1050
51
            size_t len = offsets[i] - offsets[i - 1];
1051
51
            total_size += len / 2;
1052
51
        }
1053
33
        ColumnString::check_chars_length(total_size, rows_count);
1054
33
        dst_data.resize(total_size);
1055
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1056
33
        size_t offset = 0;
1057
1058
84
        for (int i = 0; i < rows_count; ++i) {
1059
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1060
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1061
1062
51
            if (UNLIKELY(srclen == 0)) {
1063
7
                (*null_map_data)[i] = 1;
1064
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1065
7
                continue;
1066
7
            }
1067
1068
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1069
1070
44
            if (outlen == 0) {
1071
13
                (*null_map_data)[i] = 1;
1072
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1073
13
                continue;
1074
13
            }
1075
1076
31
            offset += outlen;
1077
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1078
31
        }
1079
33
        dst_data.pop_back(total_size - offset);
1080
33
        return Status::OK();
1081
33
    }
1082
};
1083
1084
struct NameStringSpace {
1085
    static constexpr auto name = "space";
1086
};
1087
1088
struct StringSpace {
1089
    using ReturnType = DataTypeString;
1090
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1091
    using Type = Int32;
1092
    using ReturnColumnType = ColumnString;
1093
1094
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1095
10
                         ColumnString::Offsets& res_offsets) {
1096
10
        res_offsets.resize(data.size());
1097
10
        size_t input_size = res_offsets.size();
1098
10
        int64_t total_size = 0;
1099
34
        for (size_t i = 0; i < input_size; ++i) {
1100
24
            if (data[i] > 0) {
1101
14
                total_size += data[i];
1102
14
            }
1103
24
        }
1104
10
        ColumnString::check_chars_length(total_size, input_size);
1105
10
        res_data.reserve(total_size);
1106
1107
34
        for (size_t i = 0; i < input_size; ++i) {
1108
24
            if (data[i] > 0) [[likely]] {
1109
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1110
14
                cast_set(res_offsets[i], res_data.size());
1111
14
            } else {
1112
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1113
10
            }
1114
24
        }
1115
10
        return Status::OK();
1116
10
    }
1117
};
1118
1119
struct ToBase64Impl {
1120
    static constexpr auto name = "to_base64";
1121
    using ReturnType = DataTypeString;
1122
    using ColumnType = ColumnString;
1123
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1124
1125
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1126
167
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1127
167
        auto rows_count = offsets.size();
1128
167
        dst_offsets.resize(rows_count);
1129
1130
167
        size_t total_size = 0;
1131
370
        for (size_t i = 0; i < rows_count; i++) {
1132
203
            size_t len = offsets[i] - offsets[i - 1];
1133
203
            total_size += 4 * ((len + 2) / 3);
1134
203
        }
1135
167
        ColumnString::check_chars_length(total_size, rows_count);
1136
167
        dst_data.resize(total_size);
1137
167
        auto* dst_data_ptr = dst_data.data();
1138
167
        size_t offset = 0;
1139
1140
370
        for (int i = 0; i < rows_count; ++i) {
1141
203
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1142
203
            size_t srclen = offsets[i] - offsets[i - 1];
1143
1144
203
            if (UNLIKELY(srclen == 0)) {
1145
10
                dst_offsets[i] = cast_set<uint32_t>(offset);
1146
10
                continue;
1147
10
            }
1148
1149
193
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1150
193
                                               (unsigned char*)(dst_data_ptr + offset));
1151
1152
193
            offset += outlen;
1153
193
            dst_offsets[i] = cast_set<uint32_t>(offset);
1154
193
        }
1155
167
        dst_data.pop_back(total_size - offset);
1156
167
        return Status::OK();
1157
167
    }
1158
};
1159
1160
struct FromBase64Impl {
1161
    static constexpr auto name = "from_base64";
1162
    using ReturnType = DataTypeString;
1163
    using ColumnType = ColumnString;
1164
1165
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1166
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1167
171
                         NullMap& null_map) {
1168
171
        auto rows_count = offsets.size();
1169
171
        dst_offsets.resize(rows_count);
1170
1171
171
        size_t total_size = 0;
1172
398
        for (size_t i = 0; i < rows_count; i++) {
1173
227
            auto len = offsets[i] - offsets[i - 1];
1174
227
            total_size += len / 4 * 3;
1175
227
        }
1176
171
        ColumnString::check_chars_length(total_size, rows_count);
1177
171
        dst_data.resize(total_size);
1178
171
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1179
171
        size_t offset = 0;
1180
1181
398
        for (int i = 0; i < rows_count; ++i) {
1182
227
            if (UNLIKELY(null_map[i])) {
1183
0
                null_map[i] = 1;
1184
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1185
0
                continue;
1186
0
            }
1187
1188
227
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1189
227
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1190
1191
227
            if (UNLIKELY(srclen == 0)) {
1192
9
                dst_offsets[i] = cast_set<uint32_t>(offset);
1193
9
                continue;
1194
9
            }
1195
1196
218
            if (UNLIKELY(srclen % 4 != 0)) {
1197
68
                null_map[i] = 1;
1198
68
                dst_offsets[i] = cast_set<uint32_t>(offset);
1199
68
                continue;
1200
68
            }
1201
1202
150
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1203
1204
150
            if (outlen < 0) {
1205
4
                null_map[i] = 1;
1206
4
                dst_offsets[i] = cast_set<uint32_t>(offset);
1207
146
            } else {
1208
146
                offset += outlen;
1209
146
                dst_offsets[i] = cast_set<uint32_t>(offset);
1210
146
            }
1211
150
        }
1212
171
        dst_data.pop_back(total_size - offset);
1213
171
        return Status::OK();
1214
171
    }
1215
};
1216
1217
struct StringAppendTrailingCharIfAbsent {
1218
    static constexpr auto name = "append_trailing_char_if_absent";
1219
    using Chars = ColumnString::Chars;
1220
    using Offsets = ColumnString::Offsets;
1221
    using ReturnType = DataTypeString;
1222
    using ColumnType = ColumnString;
1223
1224
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1225
48
        if (str.size < end.size) {
1226
11
            return false;
1227
11
        }
1228
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1229
37
        return str.end_with(end);
1230
48
    }
1231
1232
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1233
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1234
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1235
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1236
56
        size_t input_rows_count = loffsets.size();
1237
56
        res_offsets.resize(input_rows_count);
1238
56
        fmt::memory_buffer buffer;
1239
1240
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1241
102
            buffer.clear();
1242
1243
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1244
102
                                       loffsets[i] - loffsets[i - 1]);
1245
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1246
102
                                       roffsets[i] - roffsets[i - 1]);
1247
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1248
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1249
102
                    rstr.begin(), rstr.end(), 2);
1250
1251
102
            if (char_len != 1) {
1252
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1253
66
                continue;
1254
66
            }
1255
36
            if (str_end_with(lstr, rstr)) {
1256
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1257
9
                continue;
1258
9
            }
1259
1260
27
            buffer.append(lstr.begin(), lstr.end());
1261
27
            buffer.append(rstr.begin(), rstr.end());
1262
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1263
27
                                        res_offsets);
1264
27
        }
1265
56
    }
1266
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1267
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1268
8
                              NullMap& null_map_data) {
1269
8
        size_t input_rows_count = loffsets.size();
1270
8
        res_offsets.resize(input_rows_count);
1271
8
        fmt::memory_buffer buffer;
1272
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1273
8
        auto [byte_len, char_len] =
1274
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1275
8
        if (char_len != 1) {
1276
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1277
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1278
2
            }
1279
2
            return;
1280
2
        }
1281
1282
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1283
6
            buffer.clear();
1284
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1285
6
                                       loffsets[i] - loffsets[i - 1]);
1286
1287
6
            if (str_end_with(lstr, rstr)) {
1288
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1289
2
                continue;
1290
2
            }
1291
1292
4
            buffer.append(lstr.begin(), lstr.end());
1293
4
            buffer.append(rstr.begin(), rstr.end());
1294
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1295
4
                                        res_offsets);
1296
4
        }
1297
6
    }
1298
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1299
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1300
8
                              NullMap& null_map_data) {
1301
8
        size_t input_rows_count = roffsets.size();
1302
8
        res_offsets.resize(input_rows_count);
1303
8
        fmt::memory_buffer buffer;
1304
1305
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1306
8
            buffer.clear();
1307
1308
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1309
8
                                       roffsets[i] - roffsets[i - 1]);
1310
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1311
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1312
8
                    rstr.begin(), rstr.end(), 2);
1313
1314
8
            if (char_len != 1) {
1315
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1316
2
                continue;
1317
2
            }
1318
6
            if (str_end_with(lstr, rstr)) {
1319
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1320
2
                continue;
1321
2
            }
1322
1323
4
            buffer.append(lstr.begin(), lstr.end());
1324
4
            buffer.append(rstr.begin(), rstr.end());
1325
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1326
4
                                        res_offsets);
1327
4
        }
1328
8
    }
1329
};
1330
1331
struct StringLPad {
1332
    static constexpr auto name = "lpad";
1333
    static constexpr auto is_lpad = true;
1334
};
1335
1336
struct StringRPad {
1337
    static constexpr auto name = "rpad";
1338
    static constexpr auto is_lpad = false;
1339
};
1340
1341
template <typename LeftDataType, typename RightDataType>
1342
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1343
1344
template <typename LeftDataType, typename RightDataType>
1345
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1346
1347
template <typename LeftDataType, typename RightDataType>
1348
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1349
1350
// ready for regist function
1351
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1352
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1353
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1354
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1355
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1356
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1357
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1358
1359
class FunctionStringStartsWith : public FunctionBinaryToType<DataTypeString, DataTypeString,
1360
                                                             StringStartsWithImpl, NameStartsWith> {
1361
public:
1362
112
    static FunctionPtr create() { return std::make_shared<FunctionStringStartsWith>(); }
1363
1364
    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx,
1365
467
                                                const VExprSPtrs& arguments) const override {
1366
467
        auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
1367
467
        auto slot_type = expr_zonemap::fetch_compatible_slot_type(ctx, slot_literal->slot_index,
1368
467
                                                                  slot_literal->slot_type);
1369
467
        if (slot_type == nullptr) {
1370
1
            return unsupported_zonemap_filter(ctx);
1371
1
        }
1372
466
        auto zone_map_ref = ctx.zone_map(slot_literal->slot_index);
1373
466
        if (zone_map_ref == nullptr) {
1374
0
            return unsupported_zonemap_filter(ctx);
1375
0
        }
1376
466
        const auto& zone_map = *zone_map_ref;
1377
466
        if (!zone_map.has_not_null) {
1378
2
            return ZoneMapFilterResult::kNoMatch;
1379
2
        }
1380
464
        if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, slot_type)) {
1381
451
            return unsupported_zonemap_filter(ctx);
1382
451
        }
1383
1384
13
        const auto prefix = slot_literal->literal.as_string_view();
1385
13
        auto lower = Field::create_field<TYPE_STRING>(std::string(prefix));
1386
13
        if (zone_map.max_value < lower) {
1387
7
            return ZoneMapFilterResult::kNoMatch;
1388
7
        }
1389
6
        auto upper_prefix = _next_prefix_for_starts_with_zonemap(prefix);
1390
6
        if (upper_prefix.has_value() &&
1391
6
            !(zone_map.min_value < Field::create_field<TYPE_STRING>(*upper_prefix))) {
1392
1
            return ZoneMapFilterResult::kNoMatch;
1393
1
        }
1394
5
        return ZoneMapFilterResult::kMayMatch;
1395
6
    }
1396
1397
479
    bool can_evaluate_zonemap_filter(const VExprSPtrs& arguments) const override {
1398
479
        auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
1399
479
        if (!slot_literal.has_value() || slot_literal->literal_on_left) {
1400
1
            return false;
1401
1
        }
1402
1403
        // A NULL prefix makes starts_with(slot, NULL) evaluate to NULL. An empty prefix matches
1404
        // every non-NULL string and cannot prune by range. Reject both shapes here before
1405
        // evaluate_zonemap_filter is called.
1406
478
        if (slot_literal->literal.is_null()) {
1407
1
            return false;
1408
1
        }
1409
1410
477
        DORIS_CHECK(slot_literal->slot_type != nullptr);
1411
477
        DORIS_CHECK(slot_literal->literal_type != nullptr);
1412
477
        DORIS_CHECK(is_string_type(remove_nullable(slot_literal->slot_type)->get_primitive_type()));
1413
477
        DORIS_CHECK(
1414
477
                is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type()));
1415
1416
477
        const auto prefix = slot_literal->literal.as_string_view();
1417
477
        return !prefix.empty();
1418
478
    }
1419
1420
private:
1421
    static std::optional<std::string> _next_prefix_for_starts_with_zonemap(
1422
6
            std::string_view prefix) {
1423
        // ZoneMap string bounds are compared by bytewise Field ordering. For starts_with(s, p),
1424
        // the safe upper bound is the next byte string after p: p <= s < next_prefix(p).
1425
        // For example, starts_with(s, "ab") can use the range "ab" <= s < "ac".
1426
6
        std::string upper(prefix);
1427
7
        for (auto i = static_cast<int64_t>(upper.size()) - 1; i >= 0; --i) {
1428
6
            auto byte = static_cast<unsigned char>(upper[i]);
1429
6
            if (byte != std::numeric_limits<unsigned char>::max()) {
1430
5
                upper[i] = static_cast<char>(byte + 1);
1431
5
                upper.resize(i + 1);
1432
5
                return upper;
1433
5
            }
1434
6
        }
1435
1
        return std::nullopt;
1436
6
    }
1437
};
1438
1439
using FunctionStringEndsWith =
1440
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1441
using FunctionStringInstr =
1442
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1443
using FunctionStringLocate =
1444
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1445
using FunctionStringFindInSet =
1446
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1447
1448
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1449
1450
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1451
1452
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1453
1454
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1455
1456
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1457
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1458
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1459
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1460
1461
using FunctionStringAppendTrailingCharIfAbsent =
1462
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1463
1464
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1465
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1466
1467
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1468
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1469
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1470
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1471
extern void register_function_string_search(SimpleFunctionFactory& factory);
1472
extern void register_function_string_url(SimpleFunctionFactory& factory);
1473
1474
8
void register_function_string(SimpleFunctionFactory& factory) {
1475
8
    register_function_string_basic(factory);
1476
8
    register_function_string_digest(factory);
1477
8
    register_function_string_mask(factory);
1478
8
    register_function_string_misc(factory);
1479
8
    register_function_string_search(factory);
1480
8
    register_function_string_url(factory);
1481
1482
8
    factory.register_function<FunctionStringParseDataSize>();
1483
8
    factory.register_function<FunctionStringASCII>();
1484
8
    factory.register_function<FunctionStringLength>();
1485
8
    factory.register_function<FunctionCrc32>();
1486
8
    factory.register_function<FunctionStringUTF8Length>();
1487
8
    factory.register_function<FunctionStringSpace>();
1488
8
    factory.register_function<FunctionStringStartsWith>();
1489
8
    factory.register_function<FunctionStringEndsWith>();
1490
8
    factory.register_function<FunctionStringInstr>();
1491
8
    factory.register_function<FunctionStringFindInSet>();
1492
8
    factory.register_function<FunctionStringLocate>();
1493
8
    factory.register_function<FunctionQuote>();
1494
8
    factory.register_function<FunctionReverseCommon>();
1495
8
    factory.register_function<FunctionUnHex>();
1496
8
    factory.register_function<FunctionUnHexNullable>();
1497
8
    factory.register_function<FunctionToLower>();
1498
8
    factory.register_function<FunctionToUpper>();
1499
8
    factory.register_function<FunctionToInitcap>();
1500
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1501
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1502
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1503
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1504
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1505
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1506
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1507
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1508
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1509
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1510
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1511
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1512
8
    factory.register_function<FunctionStringConcat>();
1513
8
    factory.register_function<FunctionStringElt>();
1514
8
    factory.register_function<FunctionStringConcatWs>();
1515
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1516
8
    factory.register_function<FunctionStringRepeat>();
1517
8
    factory.register_function<FunctionStringLPad>();
1518
8
    factory.register_function<FunctionStringRPad>();
1519
8
    factory.register_function<FunctionToBase64>();
1520
8
    factory.register_function<FunctionFromBase64>();
1521
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1522
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1523
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1524
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1525
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1526
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1527
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1528
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1529
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1530
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1531
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1532
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1533
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1534
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1535
8
    factory.register_function<
1536
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1537
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1538
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1539
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1540
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1541
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1542
8
    factory.register_function<FunctionOverlay>();
1543
8
    factory.register_function<FunctionIsValidUTF8>();
1544
1545
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1546
8
    factory.register_alias(FunctionToLower::name, "lcase");
1547
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1548
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1549
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1550
8
    factory.register_alias(FunctionOverlay::name, "insert");
1551
8
}
1552
1553
} // namespace doris