Coverage Report

Created: 2026-07-05 02:38

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <compare>
28
#include <cstddef>
29
#include <cstdint>
30
#include <limits>
31
#include <optional>
32
#include <string>
33
#include <string_view>
34
35
#include "common/cast_set.h"
36
#include "common/check.h"
37
#include "common/logging.h"
38
#include "common/status.h"
39
#include "core/column/column.h"
40
#include "core/column/column_string.h"
41
#include "core/data_type/data_type_nullable.h"
42
#include "core/pod_array_fwd.h"
43
#include "core/string_ref.h"
44
#include "exprs/expr_zonemap_filter.h"
45
#include "exprs/function/function_reverse.h"
46
#include "exprs/function/function_string_concat.h"
47
#include "exprs/function/function_string_format.h"
48
#include "exprs/function/function_string_replace.h"
49
#include "exprs/function/function_string_to_string.h"
50
#include "exprs/function/function_totype.h"
51
#include "exprs/function/simple_function_factory.h"
52
#include "exprs/function/string_hex_util.h"
53
#include "util/string_search.hpp"
54
#include "util/url_coding.h"
55
#include "util/utf8_check.h"
56
57
namespace doris {
58
struct NameStringASCII {
59
    static constexpr auto name = "ascii";
60
};
61
62
struct StringASCII {
63
    using ReturnType = DataTypeInt32;
64
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
65
    using Type = String;
66
    using ReturnColumnType = ColumnInt32;
67
68
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
69
38
                         PaddedPODArray<Int32>& res) {
70
38
        auto size = offsets.size();
71
38
        res.resize(size);
72
86
        for (int i = 0; i < size; ++i) {
73
48
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
74
48
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
75
48
        }
76
38
        return Status::OK();
77
38
    }
78
};
79
80
struct NameParseDataSize {
81
    static constexpr auto name = "parse_data_size";
82
};
83
84
static const std::map<std::string_view, Int128> UNITS = {
85
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
86
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
87
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
88
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
89
        {"YB", static_cast<Int128>(1) << 80}};
90
91
struct ParseDataSize {
92
    using ReturnType = DataTypeInt128;
93
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
94
    using Type = String;
95
    using ReturnColumnType = ColumnInt128;
96
97
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
98
48
                         PaddedPODArray<Int128>& res) {
99
48
        auto size = offsets.size();
100
48
        res.resize(size);
101
100
        for (int i = 0; i < size; ++i) {
102
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
103
52
            int str_size = offsets[i] - offsets[i - 1];
104
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
105
52
        }
106
48
        return Status::OK();
107
48
    }
108
109
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
110
52
        int digit_length = 0;
111
216
        for (char c : dataSize) {
112
216
            if (isdigit(c) || c == '.') {
113
166
                digit_length++;
114
166
            } else {
115
50
                break;
116
50
            }
117
216
        }
118
119
52
        if (digit_length == 0) {
120
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
121
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
122
4
                                   dataSize);
123
4
        }
124
        // 123.45MB--->123.45 : MB
125
48
        double value = 0.0;
126
48
        try {
127
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
128
48
        } catch (const std::exception& e) {
129
0
            throw doris::Exception(
130
0
                    ErrorCode::INVALID_ARGUMENT,
131
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
132
0
                    dataSize, e.what());
133
0
        }
134
48
        auto unit = dataSize.substr(digit_length);
135
48
        auto it = UNITS.find(unit);
136
48
        if (it != UNITS.end()) {
137
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
138
45
        } else {
139
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
140
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
141
3
                                   dataSize);
142
3
        }
143
48
    }
144
};
145
146
struct NameQuote {
147
    static constexpr auto name = "quote";
148
};
149
150
struct NameQuoteImpl {
151
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
152
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
153
17
        size_t offset_size = offsets.size();
154
17
        ColumnString::Offset pos = 0;
155
17
        res_offsets.resize(offset_size);
156
17
        res_data.resize(data.size() + offset_size * 2);
157
45
        for (int i = 0; i < offset_size; i++) {
158
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
159
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
160
28
            res_data[pos] = '\'';
161
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
162
28
            res_data[pos + size + 1] = '\'';
163
28
            pos += size + 2;
164
28
            res_offsets[i] = pos;
165
28
        }
166
17
        return Status::OK();
167
17
    }
168
};
169
170
struct NameStringLength {
171
    static constexpr auto name = "length";
172
};
173
174
struct StringLengthImpl {
175
    using ReturnType = DataTypeInt32;
176
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
177
    using Type = String;
178
    using ReturnColumnType = ColumnInt32;
179
180
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
181
714
                         PaddedPODArray<Int32>& res) {
182
714
        auto size = offsets.size();
183
714
        res.resize(size);
184
288k
        for (int i = 0; i < size; ++i) {
185
287k
            int str_size = offsets[i] - offsets[i - 1];
186
287k
            res[i] = str_size;
187
287k
        }
188
714
        return Status::OK();
189
714
    }
190
};
191
192
struct NameCrc32 {
193
    static constexpr auto name = "crc32";
194
};
195
196
struct Crc32Impl {
197
    using ReturnType = DataTypeInt64;
198
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
199
    using Type = String;
200
    using ReturnColumnType = ColumnInt64;
201
202
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
203
3
                         PaddedPODArray<Int64>& res) {
204
3
        auto size = offsets.size();
205
3
        res.resize(size);
206
6
        for (int i = 0; i < size; ++i) {
207
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
208
3
                             offsets[i] - offsets[i - 1]);
209
3
        }
210
3
        return Status::OK();
211
3
    }
212
};
213
214
struct NameStringUtf8Length {
215
    static constexpr auto name = "char_length";
216
};
217
218
struct StringUtf8LengthImpl {
219
    using ReturnType = DataTypeInt32;
220
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
221
    using Type = String;
222
    using ReturnColumnType = ColumnInt32;
223
224
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
225
48
                         PaddedPODArray<Int32>& res) {
226
48
        auto size = offsets.size();
227
48
        res.resize(size);
228
110
        for (int i = 0; i < size; ++i) {
229
62
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
230
62
            int str_size = offsets[i] - offsets[i - 1];
231
62
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
232
62
        }
233
48
        return Status::OK();
234
48
    }
235
};
236
237
struct NameIsValidUTF8 {
238
    static constexpr auto name = "is_valid_utf8";
239
};
240
241
struct IsValidUTF8Impl {
242
    using ReturnType = DataTypeUInt8;
243
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
244
    using Type = String;
245
    using ReturnColumnType = ColumnUInt8;
246
247
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
248
39
                         PaddedPODArray<UInt8>& res) {
249
39
        auto size = offsets.size();
250
39
        res.resize(size);
251
98
        for (size_t i = 0; i < size; ++i) {
252
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
253
59
            size_t str_size = offsets[i] - offsets[i - 1];
254
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
255
59
        }
256
39
        return Status::OK();
257
39
    }
258
};
259
260
struct NameStartsWith {
261
    static constexpr auto name = "starts_with";
262
};
263
264
struct StartsWithOp {
265
    using ResultDataType = DataTypeUInt8;
266
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
267
268
84
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
269
84
        res = strl.starts_with(strr);
270
84
    }
271
};
272
273
struct NameEndsWith {
274
    static constexpr auto name = "ends_with";
275
};
276
277
struct EndsWithOp {
278
    using ResultDataType = DataTypeUInt8;
279
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
280
281
92
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
282
92
        res = strl.ends_with(strr);
283
92
    }
284
};
285
286
struct NameFindInSet {
287
    static constexpr auto name = "find_in_set";
288
};
289
290
struct FindInSetOp {
291
    using ResultDataType = DataTypeInt32;
292
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
293
120
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
294
273
        for (const auto& c : strl) {
295
273
            if (c == ',') {
296
21
                res = 0;
297
21
                return;
298
21
            }
299
273
        }
300
301
99
        int32_t token_index = 1;
302
99
        int32_t start = 0;
303
99
        int32_t end;
304
305
203
        do {
306
203
            end = start;
307
            // Position end.
308
610
            while (end < strr.length() && strr[end] != ',') {
309
407
                ++end;
310
407
            }
311
312
203
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
313
43
                res = token_index;
314
43
                return;
315
43
            }
316
317
            // Re-position start and end past ','
318
160
            start = end + 1;
319
160
            ++token_index;
320
160
        } while (start < strr.length());
321
56
        res = 0;
322
56
    }
323
};
324
325
struct NameInstr {
326
    static constexpr auto name = "instr";
327
};
328
329
// LeftDataType and RightDataType are DataTypeString
330
template <typename LeftDataType, typename RightDataType>
331
struct StringInStrImpl {
332
    using ResultDataType = DataTypeInt32;
333
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
334
335
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
336
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
337
72
        StringRef lstr_ref(ldata.data, ldata.size);
338
339
72
        auto size = roffsets.size();
340
72
        res.resize(size);
341
144
        for (int i = 0; i < size; ++i) {
342
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
343
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
344
345
72
            StringRef rstr_ref(r_raw_str, r_str_size);
346
347
72
            res[i] = execute(lstr_ref, rstr_ref);
348
72
        }
349
350
72
        return Status::OK();
351
72
    }
352
353
    static Status vector_scalar(const ColumnString::Chars& ldata,
354
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
355
82
                                ResultPaddedPODArray& res) {
356
82
        auto size = loffsets.size();
357
82
        res.resize(size);
358
359
82
        if (rdata.size == 0) {
360
12
            std::fill(res.begin(), res.end(), 1);
361
12
            return Status::OK();
362
12
        }
363
364
70
        const UInt8* begin = ldata.data();
365
70
        const UInt8* end = begin + ldata.size();
366
70
        const UInt8* pos = begin;
367
368
        /// Current index in the array of strings.
369
70
        size_t i = 0;
370
70
        std::fill(res.begin(), res.end(), 0);
371
372
70
        StringRef rstr_ref(rdata.data, rdata.size);
373
70
        StringSearch search(&rstr_ref);
374
375
86
        while (pos < end) {
376
            // search return matched substring start offset
377
60
            pos = (UInt8*)search.search((char*)pos, end - pos);
378
60
            if (pos >= end) {
379
44
                break;
380
44
            }
381
382
            /// Determine which index it refers to.
383
            /// begin + value_offsets[i] is the start offset of string at i+1
384
16
            while (begin + loffsets[i] < pos) {
385
0
                ++i;
386
0
            }
387
388
            /// We check that the entry does not pass through the boundaries of strings.
389
16
            if (pos + rdata.size <= begin + loffsets[i]) {
390
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
391
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
392
16
                auto len = std::min(l_str_size, loc);
393
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
394
16
                res[i] = loc + 1;
395
16
            }
396
397
            // move to next string offset
398
16
            pos = begin + loffsets[i];
399
16
            ++i;
400
16
        }
401
402
70
        return Status::OK();
403
82
    }
404
405
    static Status vector_vector(const ColumnString::Chars& ldata,
406
                                const ColumnString::Offsets& loffsets,
407
                                const ColumnString::Chars& rdata,
408
176
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
409
176
        DCHECK_EQ(loffsets.size(), roffsets.size());
410
411
176
        auto size = loffsets.size();
412
176
        res.resize(size);
413
463
        for (int i = 0; i < size; ++i) {
414
287
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
415
287
            int l_str_size = loffsets[i] - loffsets[i - 1];
416
287
            StringRef lstr_ref(l_raw_str, l_str_size);
417
418
287
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
419
287
            int r_str_size = roffsets[i] - roffsets[i - 1];
420
287
            StringRef rstr_ref(r_raw_str, r_str_size);
421
422
287
            res[i] = execute(lstr_ref, rstr_ref);
423
287
        }
424
425
176
        return Status::OK();
426
176
    }
427
428
359
    static int execute(const StringRef& strl, const StringRef& strr) {
429
359
        if (strr.size == 0) {
430
74
            return 1;
431
74
        }
432
433
285
        StringSearch search(&strr);
434
        // Hive returns positions starting from 1.
435
285
        int loc = search.search(&strl);
436
285
        if (loc > 0) {
437
59
            int len = std::min(loc, (int)strl.size);
438
59
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
439
59
        }
440
441
285
        return loc + 1;
442
359
    }
443
};
444
445
// the same impl as instr
446
struct NameLocate {
447
    static constexpr auto name = "locate";
448
};
449
450
// LeftDataType and RightDataType are DataTypeString
451
template <typename LeftDataType, typename RightDataType>
452
struct StringLocateImpl {
453
    using ResultDataType = DataTypeInt32;
454
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
455
456
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
457
36
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
458
36
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
459
36
                                                                           res);
460
36
    }
461
462
    static Status vector_scalar(const ColumnString::Chars& ldata,
463
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
464
36
                                ResultPaddedPODArray& res) {
465
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
466
36
                                                                           res);
467
36
    }
468
469
    static Status vector_vector(const ColumnString::Chars& ldata,
470
                                const ColumnString::Offsets& loffsets,
471
                                const ColumnString::Chars& rdata,
472
92
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
473
92
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
474
92
                                                                           loffsets, res);
475
92
    }
476
};
477
478
// LeftDataType and RightDataType are DataTypeString
479
template <typename LeftDataType, typename RightDataType, typename OP>
480
struct StringFunctionImpl {
481
    using ResultDataType = typename OP::ResultDataType;
482
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
483
484
    static Status vector_vector(const ColumnString::Chars& ldata,
485
                                const ColumnString::Offsets& loffsets,
486
                                const ColumnString::Chars& rdata,
487
164
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
164
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
164
        auto size = loffsets.size();
491
164
        res.resize(size);
492
376
        for (int i = 0; i < size; ++i) {
493
212
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
212
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
212
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
212
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
212
            std::string_view lview(l_raw_str, l_str_size);
500
212
            std::string_view rview(r_raw_str, r_str_size);
501
502
212
            OP::execute(lview, rview, res[i]);
503
212
        }
504
164
        return Status::OK();
505
164
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
487
71
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
71
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
71
        auto size = loffsets.size();
491
71
        res.resize(size);
492
147
        for (int i = 0; i < size; ++i) {
493
76
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
76
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
76
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
76
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
76
            std::string_view lview(l_raw_str, l_str_size);
500
76
            std::string_view rview(r_raw_str, r_str_size);
501
502
76
            OP::execute(lview, rview, res[i]);
503
76
        }
504
71
        return Status::OK();
505
71
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
487
45
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
45
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
45
        auto size = loffsets.size();
491
45
        res.resize(size);
492
109
        for (int i = 0; i < size; ++i) {
493
64
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
64
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
64
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
64
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
64
            std::string_view lview(l_raw_str, l_str_size);
500
64
            std::string_view rview(r_raw_str, r_str_size);
501
502
64
            OP::execute(lview, rview, res[i]);
503
64
        }
504
45
        return Status::OK();
505
45
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
487
48
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
488
48
        DCHECK_EQ(loffsets.size(), roffsets.size());
489
490
48
        auto size = loffsets.size();
491
48
        res.resize(size);
492
120
        for (int i = 0; i < size; ++i) {
493
72
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
494
72
            int l_str_size = loffsets[i] - loffsets[i - 1];
495
496
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
499
72
            std::string_view lview(l_raw_str, l_str_size);
500
72
            std::string_view rview(r_raw_str, r_str_size);
501
502
72
            OP::execute(lview, rview, res[i]);
503
72
        }
504
48
        return Status::OK();
505
48
    }
506
    static Status vector_scalar(const ColumnString::Chars& ldata,
507
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
508
34
                                ResultPaddedPODArray& res) {
509
34
        auto size = loffsets.size();
510
34
        res.resize(size);
511
34
        std::string_view rview(rdata.data, rdata.size);
512
68
        for (int i = 0; i < size; ++i) {
513
34
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
34
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
34
            std::string_view lview(l_raw_str, l_str_size);
516
517
34
            OP::execute(lview, rview, res[i]);
518
34
        }
519
34
        return Status::OK();
520
34
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
508
4
                                ResultPaddedPODArray& res) {
509
4
        auto size = loffsets.size();
510
4
        res.resize(size);
511
4
        std::string_view rview(rdata.data, rdata.size);
512
8
        for (int i = 0; i < size; ++i) {
513
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
4
            std::string_view lview(l_raw_str, l_str_size);
516
517
4
            OP::execute(lview, rview, res[i]);
518
4
        }
519
4
        return Status::OK();
520
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
508
14
                                ResultPaddedPODArray& res) {
509
14
        auto size = loffsets.size();
510
14
        res.resize(size);
511
14
        std::string_view rview(rdata.data, rdata.size);
512
28
        for (int i = 0; i < size; ++i) {
513
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
14
            std::string_view lview(l_raw_str, l_str_size);
516
517
14
            OP::execute(lview, rview, res[i]);
518
14
        }
519
14
        return Status::OK();
520
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
508
16
                                ResultPaddedPODArray& res) {
509
16
        auto size = loffsets.size();
510
16
        res.resize(size);
511
16
        std::string_view rview(rdata.data, rdata.size);
512
32
        for (int i = 0; i < size; ++i) {
513
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
514
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
515
16
            std::string_view lview(l_raw_str, l_str_size);
516
517
16
            OP::execute(lview, rview, res[i]);
518
16
        }
519
16
        return Status::OK();
520
16
    }
521
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
522
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
44
        auto size = roffsets.size();
524
44
        res.resize(size);
525
44
        std::string_view lview(ldata.data, ldata.size);
526
94
        for (int i = 0; i < size; ++i) {
527
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
50
            std::string_view rview(r_raw_str, r_str_size);
530
531
50
            OP::execute(lview, rview, res[i]);
532
50
        }
533
44
        return Status::OK();
534
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
522
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
4
        auto size = roffsets.size();
524
4
        res.resize(size);
525
4
        std::string_view lview(ldata.data, ldata.size);
526
8
        for (int i = 0; i < size; ++i) {
527
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
4
            std::string_view rview(r_raw_str, r_str_size);
530
531
4
            OP::execute(lview, rview, res[i]);
532
4
        }
533
4
        return Status::OK();
534
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
522
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
14
        auto size = roffsets.size();
524
14
        res.resize(size);
525
14
        std::string_view lview(ldata.data, ldata.size);
526
28
        for (int i = 0; i < size; ++i) {
527
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
14
            std::string_view rview(r_raw_str, r_str_size);
530
531
14
            OP::execute(lview, rview, res[i]);
532
14
        }
533
14
        return Status::OK();
534
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
522
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
523
26
        auto size = roffsets.size();
524
26
        res.resize(size);
525
26
        std::string_view lview(ldata.data, ldata.size);
526
58
        for (int i = 0; i < size; ++i) {
527
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
528
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
529
32
            std::string_view rview(r_raw_str, r_str_size);
530
531
32
            OP::execute(lview, rview, res[i]);
532
32
        }
533
26
        return Status::OK();
534
26
    }
535
};
536
537
struct NameToLower {
538
    static constexpr auto name = "lower";
539
};
540
541
struct NameToUpper {
542
    static constexpr auto name = "upper";
543
};
544
545
template <typename OpName>
546
struct TransferImpl {
547
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
548
250
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
549
250
        size_t offset_size = offsets.size();
550
250
        if (UNLIKELY(!offset_size)) {
551
0
            return Status::OK();
552
0
        }
553
554
250
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
555
250
        res_offsets.resize(offset_size);
556
250
        if (is_ascii) {
557
188
            memcpy_small_allow_read_write_overflow15(
558
188
                    res_offsets.data(), offsets.data(),
559
188
                    offset_size * sizeof(ColumnString::Offsets::value_type));
560
561
188
            size_t data_length = data.size();
562
188
            res_data.resize(data_length);
563
188
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
564
53
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
565
135
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
566
135
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
567
135
            }
568
188
        } else {
569
62
            execute_utf8(data, offsets, res_data, res_offsets);
570
62
        }
571
572
250
        return Status::OK();
573
250
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
548
157
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
549
157
        size_t offset_size = offsets.size();
550
157
        if (UNLIKELY(!offset_size)) {
551
0
            return Status::OK();
552
0
        }
553
554
157
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
555
157
        res_offsets.resize(offset_size);
556
157
        if (is_ascii) {
557
135
            memcpy_small_allow_read_write_overflow15(
558
135
                    res_offsets.data(), offsets.data(),
559
135
                    offset_size * sizeof(ColumnString::Offsets::value_type));
560
561
135
            size_t data_length = data.size();
562
135
            res_data.resize(data_length);
563
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
564
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
565
135
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
566
135
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
567
135
            }
568
135
        } else {
569
22
            execute_utf8(data, offsets, res_data, res_offsets);
570
22
        }
571
572
157
        return Status::OK();
573
157
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
548
93
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
549
93
        size_t offset_size = offsets.size();
550
93
        if (UNLIKELY(!offset_size)) {
551
0
            return Status::OK();
552
0
        }
553
554
93
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
555
93
        res_offsets.resize(offset_size);
556
93
        if (is_ascii) {
557
53
            memcpy_small_allow_read_write_overflow15(
558
53
                    res_offsets.data(), offsets.data(),
559
53
                    offset_size * sizeof(ColumnString::Offsets::value_type));
560
561
53
            size_t data_length = data.size();
562
53
            res_data.resize(data_length);
563
53
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
564
53
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
565
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
566
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
567
            }
568
53
        } else {
569
40
            execute_utf8(data, offsets, res_data, res_offsets);
570
40
        }
571
572
93
        return Status::OK();
573
93
    }
574
575
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
576
62
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
577
62
        std::string result;
578
202
        for (int64_t i = 0; i < offsets.size(); ++i) {
579
140
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
580
140
            uint32_t size = offsets[i] - offsets[i - 1];
581
582
140
            result.clear();
583
140
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
584
92
                to_upper_utf8(begin, size, result);
585
92
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
586
48
                to_lower_utf8(begin, size, result);
587
48
            }
588
140
            StringOP::push_value_string(result, i, res_data, res_offsets);
589
140
        }
590
62
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
576
22
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
577
22
        std::string result;
578
70
        for (int64_t i = 0; i < offsets.size(); ++i) {
579
48
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
580
48
            uint32_t size = offsets[i] - offsets[i - 1];
581
582
48
            result.clear();
583
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
584
                to_upper_utf8(begin, size, result);
585
48
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
586
48
                to_lower_utf8(begin, size, result);
587
48
            }
588
48
            StringOP::push_value_string(result, i, res_data, res_offsets);
589
48
        }
590
22
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
576
40
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
577
40
        std::string result;
578
132
        for (int64_t i = 0; i < offsets.size(); ++i) {
579
92
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
580
92
            uint32_t size = offsets[i] - offsets[i - 1];
581
582
92
            result.clear();
583
92
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
584
92
                to_upper_utf8(begin, size, result);
585
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
586
                to_lower_utf8(begin, size, result);
587
            }
588
92
            StringOP::push_value_string(result, i, res_data, res_offsets);
589
92
        }
590
40
    }
591
592
92
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
593
92
        icu::StringPiece sp;
594
92
        sp.set(data, size);
595
92
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
596
92
        unicode_str.toUpper();
597
92
        unicode_str.toUTF8String(result);
598
92
    }
599
600
48
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
601
48
        icu::StringPiece sp;
602
48
        sp.set(data, size);
603
48
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
604
48
        unicode_str.toLower();
605
48
        unicode_str.toUTF8String(result);
606
48
    }
607
};
608
609
// Capitalize first letter
610
struct NameToInitcap {
611
    static constexpr auto name = "initcap";
612
};
613
614
struct InitcapImpl {
615
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
616
163
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
617
163
        res_offsets.resize(offsets.size());
618
619
163
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
620
163
        if (is_ascii) {
621
105
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
622
105
        } else {
623
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
624
58
        }
625
163
        return Status::OK();
626
163
    }
627
628
    static void impl_vectors_ascii(const ColumnString::Chars& data,
629
                                   const ColumnString::Offsets& offsets,
630
                                   ColumnString::Chars& res_data,
631
105
                                   ColumnString::Offsets& res_offsets) {
632
105
        size_t offset_size = offsets.size();
633
105
        memcpy_small_allow_read_write_overflow15(
634
105
                res_offsets.data(), offsets.data(),
635
105
                offset_size * sizeof(ColumnString::Offsets::value_type));
636
637
105
        size_t data_length = data.size();
638
105
        res_data.resize(data_length);
639
105
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
640
641
105
        bool need_capitalize = true;
642
210
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
643
105
            auto end_index = res_offsets[offset_index];
644
105
            need_capitalize = true;
645
646
1.29k
            for (size_t i = start_index; i < end_index; ++i) {
647
1.19k
                if (!::isalnum(res_data[i])) {
648
211
                    need_capitalize = true;
649
980
                } else if (need_capitalize) {
650
                    /*
651
                    https://en.cppreference.com/w/cpp/string/byte/toupper
652
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
653
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
654
                    char my_toupper(char ch)
655
                    {
656
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
657
                    }
658
                    */
659
235
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
660
235
                    need_capitalize = false;
661
235
                }
662
1.19k
            }
663
664
105
            start_index = end_index;
665
105
        }
666
105
    }
667
668
    static void impl_vectors_utf8(const ColumnString::Chars& data,
669
                                  const ColumnString::Offsets& offsets,
670
                                  ColumnString::Chars& res_data,
671
58
                                  ColumnString::Offsets& res_offsets) {
672
58
        std::string result;
673
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
674
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
675
65
            uint32_t size = offsets[i] - offsets[i - 1];
676
65
            result.clear();
677
65
            to_initcap_utf8(begin, size, result);
678
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
679
65
        }
680
58
    }
681
682
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
683
65
        icu::StringPiece sp;
684
65
        sp.set(data, size);
685
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
686
65
        unicode_str.toLower();
687
65
        icu::UnicodeString output_str;
688
65
        bool need_capitalize = true;
689
65
        icu::StringCharacterIterator iter(unicode_str);
690
623
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
691
558
            if (!u_isalnum(ch)) {
692
95
                need_capitalize = true;
693
463
            } else if (need_capitalize) {
694
86
                ch = u_toupper(ch);
695
86
                need_capitalize = false;
696
86
            }
697
558
            output_str.append(ch);
698
558
        }
699
65
        output_str.toUTF8String(result);
700
65
    }
701
};
702
703
struct NameTrim {
704
    static constexpr auto name = "trim";
705
};
706
struct NameLTrim {
707
    static constexpr auto name = "ltrim";
708
};
709
struct NameRTrim {
710
    static constexpr auto name = "rtrim";
711
};
712
struct NameTrimIn {
713
    static constexpr auto name = "trim_in";
714
};
715
struct NameLTrimIn {
716
    static constexpr auto name = "ltrim_in";
717
};
718
struct NameRTrimIn {
719
    static constexpr auto name = "rtrim_in";
720
};
721
template <bool is_ltrim, bool is_rtrim, bool trim_single>
722
struct TrimUtil {
723
    static Status vector(const ColumnString::Chars& str_data,
724
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
725
201
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
201
        const size_t offset_size = str_offsets.size();
727
201
        res_offsets.resize(offset_size);
728
201
        res_data.reserve(str_data.size());
729
450
        for (size_t i = 0; i < offset_size; ++i) {
730
249
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
249
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
249
            if constexpr (is_ltrim) {
734
132
                str_begin =
735
132
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
132
            }
737
249
            if constexpr (is_rtrim) {
738
192
                str_end =
739
192
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
192
            }
741
742
249
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
249
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
249
        }
746
201
        return Status::OK();
747
201
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
42
        const size_t offset_size = str_offsets.size();
727
42
        res_offsets.resize(offset_size);
728
42
        res_data.reserve(str_data.size());
729
112
        for (size_t i = 0; i < offset_size; ++i) {
730
70
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
70
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
70
            if constexpr (is_ltrim) {
734
70
                str_begin =
735
70
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
70
            }
737
70
            if constexpr (is_rtrim) {
738
70
                str_end =
739
70
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
70
            }
741
742
70
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
70
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
70
        }
746
42
        return Status::OK();
747
42
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
36
        const size_t offset_size = str_offsets.size();
727
36
        res_offsets.resize(offset_size);
728
36
        res_data.reserve(str_data.size());
729
82
        for (size_t i = 0; i < offset_size; ++i) {
730
46
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
46
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
46
            if constexpr (is_ltrim) {
734
46
                str_begin =
735
46
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
46
            }
737
            if constexpr (is_rtrim) {
738
                str_end =
739
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
            }
741
742
46
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
46
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
46
        }
746
36
        return Status::OK();
747
36
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
62
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
62
        const size_t offset_size = str_offsets.size();
727
62
        res_offsets.resize(offset_size);
728
62
        res_data.reserve(str_data.size());
729
134
        for (size_t i = 0; i < offset_size; ++i) {
730
72
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
72
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
            if constexpr (is_ltrim) {
734
                str_begin =
735
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
            }
737
72
            if constexpr (is_rtrim) {
738
72
                str_end =
739
72
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
72
            }
741
742
72
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
72
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
72
        }
746
62
        return Status::OK();
747
62
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
5
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
5
        const size_t offset_size = str_offsets.size();
727
5
        res_offsets.resize(offset_size);
728
5
        res_data.reserve(str_data.size());
729
10
        for (size_t i = 0; i < offset_size; ++i) {
730
5
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
5
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
5
            if constexpr (is_ltrim) {
734
5
                str_begin =
735
5
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
5
            }
737
5
            if constexpr (is_rtrim) {
738
5
                str_end =
739
5
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
5
            }
741
742
5
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
5
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
5
        }
746
5
        return Status::OK();
747
5
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
11
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
11
        const size_t offset_size = str_offsets.size();
727
11
        res_offsets.resize(offset_size);
728
11
        res_data.reserve(str_data.size());
729
22
        for (size_t i = 0; i < offset_size; ++i) {
730
11
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
11
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
11
            if constexpr (is_ltrim) {
734
11
                str_begin =
735
11
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
11
            }
737
            if constexpr (is_rtrim) {
738
                str_end =
739
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
            }
741
742
11
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
11
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
11
        }
746
11
        return Status::OK();
747
11
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
725
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
726
45
        const size_t offset_size = str_offsets.size();
727
45
        res_offsets.resize(offset_size);
728
45
        res_data.reserve(str_data.size());
729
90
        for (size_t i = 0; i < offset_size; ++i) {
730
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
731
45
            const auto* str_end = str_data.data() + str_offsets[i];
732
733
            if constexpr (is_ltrim) {
734
                str_begin =
735
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
736
            }
737
45
            if constexpr (is_rtrim) {
738
45
                str_end =
739
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
740
45
            }
741
742
45
            res_data.insert_assume_reserved(str_begin, str_end);
743
            // The length of the result of the trim function will never exceed the length of the input.
744
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
745
45
        }
746
45
        return Status::OK();
747
45
    }
748
};
749
template <bool is_ltrim, bool is_rtrim, bool trim_single>
750
struct TrimInUtil {
751
    static Status vector(const ColumnString::Chars& str_data,
752
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
753
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
121
        const size_t offset_size = str_offsets.size();
755
121
        res_offsets.resize(offset_size);
756
121
        res_data.reserve(str_data.size());
757
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
121
                         simd::VStringFunctions::is_ascii(StringRef(
759
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
121
        if (all_ascii) {
762
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
68
        } else {
764
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
53
        }
766
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
753
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
43
        const size_t offset_size = str_offsets.size();
755
43
        res_offsets.resize(offset_size);
756
43
        res_data.reserve(str_data.size());
757
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
43
                         simd::VStringFunctions::is_ascii(StringRef(
759
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
43
        if (all_ascii) {
762
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
24
        } else {
764
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
19
        }
766
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
753
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
36
        const size_t offset_size = str_offsets.size();
755
36
        res_offsets.resize(offset_size);
756
36
        res_data.reserve(str_data.size());
757
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
36
                         simd::VStringFunctions::is_ascii(StringRef(
759
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
36
        if (all_ascii) {
762
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
19
        } else {
764
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
17
        }
766
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
753
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
754
42
        const size_t offset_size = str_offsets.size();
755
42
        res_offsets.resize(offset_size);
756
42
        res_data.reserve(str_data.size());
757
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
758
42
                         simd::VStringFunctions::is_ascii(StringRef(
759
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
760
761
42
        if (all_ascii) {
762
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
763
25
        } else {
764
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
765
17
        }
766
42
    }
767
768
private:
769
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
770
                                     const ColumnString::Offsets& str_offsets,
771
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
772
68
                                     ColumnString::Offsets& res_offsets) {
773
68
        const size_t offset_size = str_offsets.size();
774
68
        std::bitset<128> char_lookup;
775
68
        const char* remove_begin = remove_str.data;
776
68
        const char* remove_end = remove_str.data + remove_str.size;
777
778
251
        while (remove_begin < remove_end) {
779
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
183
            remove_begin += 1;
781
183
        }
782
783
136
        for (size_t i = 0; i < offset_size; ++i) {
784
68
            const char* str_begin =
785
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
68
            const char* left_trim_pos = str_begin;
788
68
            const char* right_trim_pos = str_end;
789
790
68
            if constexpr (is_ltrim) {
791
127
                while (left_trim_pos < str_end) {
792
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
30
                        break;
794
30
                    }
795
84
                    ++left_trim_pos;
796
84
                }
797
43
            }
798
799
68
            if constexpr (is_rtrim) {
800
114
                while (right_trim_pos > left_trim_pos) {
801
100
                    --right_trim_pos;
802
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
35
                        ++right_trim_pos;
804
35
                        break;
805
35
                    }
806
100
                }
807
49
            }
808
809
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
68
        }
813
814
68
        return Status::OK();
815
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
772
24
                                     ColumnString::Offsets& res_offsets) {
773
24
        const size_t offset_size = str_offsets.size();
774
24
        std::bitset<128> char_lookup;
775
24
        const char* remove_begin = remove_str.data;
776
24
        const char* remove_end = remove_str.data + remove_str.size;
777
778
86
        while (remove_begin < remove_end) {
779
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
62
            remove_begin += 1;
781
62
        }
782
783
48
        for (size_t i = 0; i < offset_size; ++i) {
784
24
            const char* str_begin =
785
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
24
            const char* left_trim_pos = str_begin;
788
24
            const char* right_trim_pos = str_end;
789
790
24
            if constexpr (is_ltrim) {
791
57
                while (left_trim_pos < str_end) {
792
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
17
                        break;
794
17
                    }
795
33
                    ++left_trim_pos;
796
33
                }
797
24
            }
798
799
24
            if constexpr (is_rtrim) {
800
39
                while (right_trim_pos > left_trim_pos) {
801
32
                    --right_trim_pos;
802
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
17
                        ++right_trim_pos;
804
17
                        break;
805
17
                    }
806
32
                }
807
24
            }
808
809
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
24
        }
813
814
24
        return Status::OK();
815
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
772
19
                                     ColumnString::Offsets& res_offsets) {
773
19
        const size_t offset_size = str_offsets.size();
774
19
        std::bitset<128> char_lookup;
775
19
        const char* remove_begin = remove_str.data;
776
19
        const char* remove_end = remove_str.data + remove_str.size;
777
778
73
        while (remove_begin < remove_end) {
779
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
54
            remove_begin += 1;
781
54
        }
782
783
38
        for (size_t i = 0; i < offset_size; ++i) {
784
19
            const char* str_begin =
785
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
19
            const char* left_trim_pos = str_begin;
788
19
            const char* right_trim_pos = str_end;
789
790
19
            if constexpr (is_ltrim) {
791
70
                while (left_trim_pos < str_end) {
792
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
13
                        break;
794
13
                    }
795
51
                    ++left_trim_pos;
796
51
                }
797
19
            }
798
799
            if constexpr (is_rtrim) {
800
                while (right_trim_pos > left_trim_pos) {
801
                    --right_trim_pos;
802
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
                        ++right_trim_pos;
804
                        break;
805
                    }
806
                }
807
            }
808
809
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
19
        }
813
814
19
        return Status::OK();
815
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
772
25
                                     ColumnString::Offsets& res_offsets) {
773
25
        const size_t offset_size = str_offsets.size();
774
25
        std::bitset<128> char_lookup;
775
25
        const char* remove_begin = remove_str.data;
776
25
        const char* remove_end = remove_str.data + remove_str.size;
777
778
92
        while (remove_begin < remove_end) {
779
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
780
67
            remove_begin += 1;
781
67
        }
782
783
50
        for (size_t i = 0; i < offset_size; ++i) {
784
25
            const char* str_begin =
785
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
786
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
787
25
            const char* left_trim_pos = str_begin;
788
25
            const char* right_trim_pos = str_end;
789
790
            if constexpr (is_ltrim) {
791
                while (left_trim_pos < str_end) {
792
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
793
                        break;
794
                    }
795
                    ++left_trim_pos;
796
                }
797
            }
798
799
25
            if constexpr (is_rtrim) {
800
75
                while (right_trim_pos > left_trim_pos) {
801
68
                    --right_trim_pos;
802
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
803
18
                        ++right_trim_pos;
804
18
                        break;
805
18
                    }
806
68
                }
807
25
            }
808
809
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
810
            // The length of the result of the trim function will never exceed the length of the input.
811
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
812
25
        }
813
814
25
        return Status::OK();
815
25
    }
816
817
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
818
                                    const ColumnString::Offsets& str_offsets,
819
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
820
53
                                    ColumnString::Offsets& res_offsets) {
821
53
        const size_t offset_size = str_offsets.size();
822
53
        res_offsets.resize(offset_size);
823
53
        res_data.reserve(str_data.size());
824
825
53
        std::unordered_set<std::string_view> char_lookup;
826
53
        const char* remove_begin = remove_str.data;
827
53
        const char* remove_end = remove_str.data + remove_str.size;
828
829
240
        while (remove_begin < remove_end) {
830
187
            size_t byte_len, char_len;
831
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
187
                    remove_begin, remove_end, 1);
833
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
187
            remove_begin += byte_len;
835
187
        }
836
837
140
        for (size_t i = 0; i < offset_size; ++i) {
838
87
            const char* str_begin =
839
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
87
            const char* left_trim_pos = str_begin;
842
87
            const char* right_trim_pos = str_end;
843
844
87
            if constexpr (is_ltrim) {
845
81
                while (left_trim_pos < str_end) {
846
73
                    size_t byte_len, char_len;
847
73
                    std::tie(byte_len, char_len) =
848
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
73
                                                                                   str_end, 1);
850
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
73
                        char_lookup.end()) {
852
52
                        break;
853
52
                    }
854
21
                    left_trim_pos += byte_len;
855
21
                }
856
60
            }
857
858
87
            if constexpr (is_rtrim) {
859
88
                while (right_trim_pos > left_trim_pos) {
860
80
                    const char* prev_char_pos = right_trim_pos;
861
156
                    do {
862
156
                        --prev_char_pos;
863
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
80
                        char_lookup.end()) {
867
52
                        break;
868
52
                    }
869
28
                    right_trim_pos = prev_char_pos;
870
28
                }
871
60
            }
872
873
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
87
        }
877
53
        return Status::OK();
878
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
820
19
                                    ColumnString::Offsets& res_offsets) {
821
19
        const size_t offset_size = str_offsets.size();
822
19
        res_offsets.resize(offset_size);
823
19
        res_data.reserve(str_data.size());
824
825
19
        std::unordered_set<std::string_view> char_lookup;
826
19
        const char* remove_begin = remove_str.data;
827
19
        const char* remove_end = remove_str.data + remove_str.size;
828
829
84
        while (remove_begin < remove_end) {
830
65
            size_t byte_len, char_len;
831
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
65
                    remove_begin, remove_end, 1);
833
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
65
            remove_begin += byte_len;
835
65
        }
836
837
52
        for (size_t i = 0; i < offset_size; ++i) {
838
33
            const char* str_begin =
839
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
33
            const char* left_trim_pos = str_begin;
842
33
            const char* right_trim_pos = str_end;
843
844
33
            if constexpr (is_ltrim) {
845
45
                while (left_trim_pos < str_end) {
846
41
                    size_t byte_len, char_len;
847
41
                    std::tie(byte_len, char_len) =
848
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
41
                                                                                   str_end, 1);
850
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
41
                        char_lookup.end()) {
852
29
                        break;
853
29
                    }
854
12
                    left_trim_pos += byte_len;
855
12
                }
856
33
            }
857
858
33
            if constexpr (is_rtrim) {
859
48
                while (right_trim_pos > left_trim_pos) {
860
44
                    const char* prev_char_pos = right_trim_pos;
861
90
                    do {
862
90
                        --prev_char_pos;
863
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
44
                        char_lookup.end()) {
867
29
                        break;
868
29
                    }
869
15
                    right_trim_pos = prev_char_pos;
870
15
                }
871
33
            }
872
873
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
33
        }
877
19
        return Status::OK();
878
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
820
17
                                    ColumnString::Offsets& res_offsets) {
821
17
        const size_t offset_size = str_offsets.size();
822
17
        res_offsets.resize(offset_size);
823
17
        res_data.reserve(str_data.size());
824
825
17
        std::unordered_set<std::string_view> char_lookup;
826
17
        const char* remove_begin = remove_str.data;
827
17
        const char* remove_end = remove_str.data + remove_str.size;
828
829
78
        while (remove_begin < remove_end) {
830
61
            size_t byte_len, char_len;
831
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
61
                    remove_begin, remove_end, 1);
833
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
61
            remove_begin += byte_len;
835
61
        }
836
837
44
        for (size_t i = 0; i < offset_size; ++i) {
838
27
            const char* str_begin =
839
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
27
            const char* left_trim_pos = str_begin;
842
27
            const char* right_trim_pos = str_end;
843
844
27
            if constexpr (is_ltrim) {
845
36
                while (left_trim_pos < str_end) {
846
32
                    size_t byte_len, char_len;
847
32
                    std::tie(byte_len, char_len) =
848
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
32
                                                                                   str_end, 1);
850
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
32
                        char_lookup.end()) {
852
23
                        break;
853
23
                    }
854
9
                    left_trim_pos += byte_len;
855
9
                }
856
27
            }
857
858
            if constexpr (is_rtrim) {
859
                while (right_trim_pos > left_trim_pos) {
860
                    const char* prev_char_pos = right_trim_pos;
861
                    do {
862
                        --prev_char_pos;
863
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
                        char_lookup.end()) {
867
                        break;
868
                    }
869
                    right_trim_pos = prev_char_pos;
870
                }
871
            }
872
873
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
27
        }
877
17
        return Status::OK();
878
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
820
17
                                    ColumnString::Offsets& res_offsets) {
821
17
        const size_t offset_size = str_offsets.size();
822
17
        res_offsets.resize(offset_size);
823
17
        res_data.reserve(str_data.size());
824
825
17
        std::unordered_set<std::string_view> char_lookup;
826
17
        const char* remove_begin = remove_str.data;
827
17
        const char* remove_end = remove_str.data + remove_str.size;
828
829
78
        while (remove_begin < remove_end) {
830
61
            size_t byte_len, char_len;
831
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
832
61
                    remove_begin, remove_end, 1);
833
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
834
61
            remove_begin += byte_len;
835
61
        }
836
837
44
        for (size_t i = 0; i < offset_size; ++i) {
838
27
            const char* str_begin =
839
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
840
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
841
27
            const char* left_trim_pos = str_begin;
842
27
            const char* right_trim_pos = str_end;
843
844
            if constexpr (is_ltrim) {
845
                while (left_trim_pos < str_end) {
846
                    size_t byte_len, char_len;
847
                    std::tie(byte_len, char_len) =
848
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
849
                                                                                   str_end, 1);
850
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
851
                        char_lookup.end()) {
852
                        break;
853
                    }
854
                    left_trim_pos += byte_len;
855
                }
856
            }
857
858
27
            if constexpr (is_rtrim) {
859
40
                while (right_trim_pos > left_trim_pos) {
860
36
                    const char* prev_char_pos = right_trim_pos;
861
66
                    do {
862
66
                        --prev_char_pos;
863
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
864
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
865
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
866
36
                        char_lookup.end()) {
867
23
                        break;
868
23
                    }
869
13
                    right_trim_pos = prev_char_pos;
870
13
                }
871
27
            }
872
873
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
874
            // The length of the result of the trim function will never exceed the length of the input.
875
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
876
27
        }
877
17
        return Status::OK();
878
17
    }
879
};
880
// This is an implementation of a parameter for the Trim function.
881
template <bool is_ltrim, bool is_rtrim, typename Name>
882
struct Trim1Impl {
883
    static constexpr auto name = Name::name;
884
885
145
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
885
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
885
31
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
885
37
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
885
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
885
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
885
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
886
887
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
888
91
                          uint32_t result, size_t input_rows_count) {
889
91
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
91
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
91
            auto col_res = ColumnString::create();
892
91
            char blank[] = " ";
893
91
            const StringRef remove_str(blank, 1);
894
91
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
91
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
91
                    col_res->get_offsets())));
897
91
            block.replace_by_position(result, std::move(col_res));
898
91
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
91
        return Status::OK();
904
91
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
32
                          uint32_t result, size_t input_rows_count) {
889
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
32
            auto col_res = ColumnString::create();
892
32
            char blank[] = " ";
893
32
            const StringRef remove_str(blank, 1);
894
32
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
32
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
32
                    col_res->get_offsets())));
897
32
            block.replace_by_position(result, std::move(col_res));
898
32
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
32
        return Status::OK();
904
32
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
21
                          uint32_t result, size_t input_rows_count) {
889
21
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
21
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
21
            auto col_res = ColumnString::create();
892
21
            char blank[] = " ";
893
21
            const StringRef remove_str(blank, 1);
894
21
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
21
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
21
                    col_res->get_offsets())));
897
21
            block.replace_by_position(result, std::move(col_res));
898
21
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
21
        return Status::OK();
904
21
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
26
                          uint32_t result, size_t input_rows_count) {
889
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
26
            auto col_res = ColumnString::create();
892
26
            char blank[] = " ";
893
26
            const StringRef remove_str(blank, 1);
894
26
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
26
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
26
                    col_res->get_offsets())));
897
26
            block.replace_by_position(result, std::move(col_res));
898
26
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
26
        return Status::OK();
904
26
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
1
                          uint32_t result, size_t input_rows_count) {
889
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
1
            auto col_res = ColumnString::create();
892
1
            char blank[] = " ";
893
1
            const StringRef remove_str(blank, 1);
894
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
1
                    col_res->get_offsets())));
897
1
            block.replace_by_position(result, std::move(col_res));
898
1
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
1
        return Status::OK();
904
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
5
                          uint32_t result, size_t input_rows_count) {
889
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
5
            auto col_res = ColumnString::create();
892
5
            char blank[] = " ";
893
5
            const StringRef remove_str(blank, 1);
894
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
5
                    col_res->get_offsets())));
897
5
            block.replace_by_position(result, std::move(col_res));
898
5
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
5
        return Status::OK();
904
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
888
6
                          uint32_t result, size_t input_rows_count) {
889
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
890
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
6
            auto col_res = ColumnString::create();
892
6
            char blank[] = " ";
893
6
            const StringRef remove_str(blank, 1);
894
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
895
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
896
6
                    col_res->get_offsets())));
897
6
            block.replace_by_position(result, std::move(col_res));
898
6
        } else {
899
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
900
0
                                        block.get_by_position(arguments[0]).column->get_name(),
901
0
                                        name);
902
0
        }
903
6
        return Status::OK();
904
6
    }
905
};
906
907
// This is an implementation of two parameters for the Trim function.
908
template <bool is_ltrim, bool is_rtrim, typename Name>
909
struct Trim2Impl {
910
    static constexpr auto name = Name::name;
911
912
213
    static DataTypes get_variadic_argument_types() {
913
213
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
213
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
912
15
    static DataTypes get_variadic_argument_types() {
913
15
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
15
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
912
25
    static DataTypes get_variadic_argument_types() {
913
25
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
25
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
912
80
    static DataTypes get_variadic_argument_types() {
913
80
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
80
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
912
27
    static DataTypes get_variadic_argument_types() {
913
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
912
29
    static DataTypes get_variadic_argument_types() {
913
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
912
37
    static DataTypes get_variadic_argument_types() {
913
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
914
37
    }
915
916
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
917
231
                          uint32_t result, size_t input_rows_count) {
918
231
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
231
        const auto& rcol =
920
231
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
231
                        ->get_data_column_ptr();
922
231
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
231
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
231
                auto col_res = ColumnString::create();
925
231
                const auto* remove_str_raw = col_right->get_chars().data();
926
231
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
231
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
231
                if (remove_str.size == 1) {
930
49
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
49
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
49
                            col_res->get_offsets())));
933
182
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
121
                                  std::is_same<Name, NameRTrimIn>::value) {
937
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
121
                                col->get_chars(), col->get_offsets(), remove_str,
939
121
                                col_res->get_chars(), col_res->get_offsets())));
940
121
                    } else {
941
61
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
61
                                col->get_chars(), col->get_offsets(), remove_str,
943
61
                                col_res->get_chars(), col_res->get_offsets())));
944
61
                    }
945
182
                }
946
231
                block.replace_by_position(result, std::move(col_res));
947
231
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
231
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
231
        return Status::OK();
959
231
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
7
                          uint32_t result, size_t input_rows_count) {
918
7
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
7
        const auto& rcol =
920
7
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
7
                        ->get_data_column_ptr();
922
7
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
7
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
7
                auto col_res = ColumnString::create();
925
7
                const auto* remove_str_raw = col_right->get_chars().data();
926
7
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
7
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
7
                if (remove_str.size == 1) {
930
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
2
                            col_res->get_offsets())));
933
5
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
                                  std::is_same<Name, NameRTrimIn>::value) {
937
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
                                col->get_chars(), col->get_offsets(), remove_str,
939
                                col_res->get_chars(), col_res->get_offsets())));
940
5
                    } else {
941
5
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
5
                                col->get_chars(), col->get_offsets(), remove_str,
943
5
                                col_res->get_chars(), col_res->get_offsets())));
944
5
                    }
945
5
                }
946
7
                block.replace_by_position(result, std::move(col_res));
947
7
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
7
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
7
        return Status::OK();
959
7
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
16
                          uint32_t result, size_t input_rows_count) {
918
16
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
16
        const auto& rcol =
920
16
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
16
                        ->get_data_column_ptr();
922
16
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
16
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
16
                auto col_res = ColumnString::create();
925
16
                const auto* remove_str_raw = col_right->get_chars().data();
926
16
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
16
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
16
                if (remove_str.size == 1) {
930
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
5
                            col_res->get_offsets())));
933
11
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
                                  std::is_same<Name, NameRTrimIn>::value) {
937
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
                                col->get_chars(), col->get_offsets(), remove_str,
939
                                col_res->get_chars(), col_res->get_offsets())));
940
11
                    } else {
941
11
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
11
                                col->get_chars(), col->get_offsets(), remove_str,
943
11
                                col_res->get_chars(), col_res->get_offsets())));
944
11
                    }
945
11
                }
946
16
                block.replace_by_position(result, std::move(col_res));
947
16
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
16
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
16
        return Status::OK();
959
16
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
69
                          uint32_t result, size_t input_rows_count) {
918
69
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
69
        const auto& rcol =
920
69
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
69
                        ->get_data_column_ptr();
922
69
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
69
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
69
                auto col_res = ColumnString::create();
925
69
                const auto* remove_str_raw = col_right->get_chars().data();
926
69
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
69
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
69
                if (remove_str.size == 1) {
930
24
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
24
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
24
                            col_res->get_offsets())));
933
45
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
                                  std::is_same<Name, NameRTrimIn>::value) {
937
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
                                col->get_chars(), col->get_offsets(), remove_str,
939
                                col_res->get_chars(), col_res->get_offsets())));
940
45
                    } else {
941
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
45
                                col->get_chars(), col->get_offsets(), remove_str,
943
45
                                col_res->get_chars(), col_res->get_offsets())));
944
45
                    }
945
45
                }
946
69
                block.replace_by_position(result, std::move(col_res));
947
69
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
69
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
69
        return Status::OK();
959
69
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
50
                          uint32_t result, size_t input_rows_count) {
918
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
50
        const auto& rcol =
920
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
50
                        ->get_data_column_ptr();
922
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
50
                auto col_res = ColumnString::create();
925
50
                const auto* remove_str_raw = col_right->get_chars().data();
926
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
50
                if (remove_str.size == 1) {
930
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
7
                            col_res->get_offsets())));
933
43
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
43
                                  std::is_same<Name, NameRTrimIn>::value) {
937
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
43
                                col->get_chars(), col->get_offsets(), remove_str,
939
43
                                col_res->get_chars(), col_res->get_offsets())));
940
                    } else {
941
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
                                col->get_chars(), col->get_offsets(), remove_str,
943
                                col_res->get_chars(), col_res->get_offsets())));
944
                    }
945
43
                }
946
50
                block.replace_by_position(result, std::move(col_res));
947
50
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
50
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
50
        return Status::OK();
959
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
41
                          uint32_t result, size_t input_rows_count) {
918
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
41
        const auto& rcol =
920
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
41
                        ->get_data_column_ptr();
922
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
41
                auto col_res = ColumnString::create();
925
41
                const auto* remove_str_raw = col_right->get_chars().data();
926
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
41
                if (remove_str.size == 1) {
930
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
5
                            col_res->get_offsets())));
933
36
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
36
                                  std::is_same<Name, NameRTrimIn>::value) {
937
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
36
                                col->get_chars(), col->get_offsets(), remove_str,
939
36
                                col_res->get_chars(), col_res->get_offsets())));
940
                    } else {
941
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
                                col->get_chars(), col->get_offsets(), remove_str,
943
                                col_res->get_chars(), col_res->get_offsets())));
944
                    }
945
36
                }
946
41
                block.replace_by_position(result, std::move(col_res));
947
41
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
41
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
41
        return Status::OK();
959
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
917
48
                          uint32_t result, size_t input_rows_count) {
918
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
919
48
        const auto& rcol =
920
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
921
48
                        ->get_data_column_ptr();
922
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
923
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
924
48
                auto col_res = ColumnString::create();
925
48
                const auto* remove_str_raw = col_right->get_chars().data();
926
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
927
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
928
929
48
                if (remove_str.size == 1) {
930
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
931
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
932
6
                            col_res->get_offsets())));
933
42
                } else {
934
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
935
                                  std::is_same<Name, NameLTrimIn>::value ||
936
42
                                  std::is_same<Name, NameRTrimIn>::value) {
937
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
938
42
                                col->get_chars(), col->get_offsets(), remove_str,
939
42
                                col_res->get_chars(), col_res->get_offsets())));
940
                    } else {
941
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
942
                                col->get_chars(), col->get_offsets(), remove_str,
943
                                col_res->get_chars(), col_res->get_offsets())));
944
                    }
945
42
                }
946
48
                block.replace_by_position(result, std::move(col_res));
947
48
            } else {
948
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
949
0
                                            block.get_by_position(arguments[1]).column->get_name(),
950
0
                                            name);
951
0
            }
952
953
48
        } else {
954
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
955
0
                                        block.get_by_position(arguments[0]).column->get_name(),
956
0
                                        name);
957
0
        }
958
48
        return Status::OK();
959
48
    }
960
};
961
962
template <typename impl>
963
class FunctionTrim : public IFunction {
964
public:
965
    static constexpr auto name = impl::name;
966
370
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
966
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
966
32
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
966
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
966
16
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
966
26
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
966
81
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
966
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
966
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
966
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
966
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
966
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
966
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
967
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
967
1
    String get_name() const override { return impl::name; }
968
969
262
    size_t get_number_of_arguments() const override {
970
262
        return get_variadic_argument_types_impl().size();
971
262
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
33
    size_t get_number_of_arguments() const override {
970
33
        return get_variadic_argument_types_impl().size();
971
33
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
23
    size_t get_number_of_arguments() const override {
970
23
        return get_variadic_argument_types_impl().size();
971
23
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
29
    size_t get_number_of_arguments() const override {
970
29
        return get_variadic_argument_types_impl().size();
971
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
7
    size_t get_number_of_arguments() const override {
970
7
        return get_variadic_argument_types_impl().size();
971
7
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
17
    size_t get_number_of_arguments() const override {
970
17
        return get_variadic_argument_types_impl().size();
971
17
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
969
72
    size_t get_number_of_arguments() const override {
970
72
        return get_variadic_argument_types_impl().size();
971
72
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
1
    size_t get_number_of_arguments() const override {
970
1
        return get_variadic_argument_types_impl().size();
971
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
5
    size_t get_number_of_arguments() const override {
970
5
        return get_variadic_argument_types_impl().size();
971
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
6
    size_t get_number_of_arguments() const override {
970
6
        return get_variadic_argument_types_impl().size();
971
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
19
    size_t get_number_of_arguments() const override {
970
19
        return get_variadic_argument_types_impl().size();
971
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
21
    size_t get_number_of_arguments() const override {
970
21
        return get_variadic_argument_types_impl().size();
971
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
969
29
    size_t get_number_of_arguments() const override {
970
29
        return get_variadic_argument_types_impl().size();
971
29
    }
972
973
262
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
262
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
262
        return arguments[0];
980
262
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
33
        return arguments[0];
980
33
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
23
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
23
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
23
        return arguments[0];
980
23
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
29
        return arguments[0];
980
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
7
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
7
        return arguments[0];
980
7
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
17
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
17
        return arguments[0];
980
17
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
72
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
72
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
72
        return arguments[0];
980
72
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
1
        return arguments[0];
980
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
5
        return arguments[0];
980
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
6
        return arguments[0];
980
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
19
        return arguments[0];
980
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
21
        return arguments[0];
980
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
973
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
974
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
975
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
976
0
                                   "Illegal type {} of argument of function {}",
977
0
                                   arguments[0]->get_name(), get_name());
978
0
        }
979
29
        return arguments[0];
980
29
    }
981
    // The second parameter of "trim" is a constant.
982
420
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
61
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
34
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
40
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
7
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
17
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
72
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
982
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
983
984
358
    DataTypes get_variadic_argument_types_impl() const override {
985
358
        return impl::get_variadic_argument_types();
986
358
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
41
    DataTypes get_variadic_argument_types_impl() const override {
985
41
        return impl::get_variadic_argument_types();
986
41
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
31
    DataTypes get_variadic_argument_types_impl() const override {
985
31
        return impl::get_variadic_argument_types();
986
31
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
37
    DataTypes get_variadic_argument_types_impl() const override {
985
37
        return impl::get_variadic_argument_types();
986
37
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
15
    DataTypes get_variadic_argument_types_impl() const override {
985
15
        return impl::get_variadic_argument_types();
986
15
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
25
    DataTypes get_variadic_argument_types_impl() const override {
985
25
        return impl::get_variadic_argument_types();
986
25
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
80
    DataTypes get_variadic_argument_types_impl() const override {
985
80
        return impl::get_variadic_argument_types();
986
80
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
9
    DataTypes get_variadic_argument_types_impl() const override {
985
9
        return impl::get_variadic_argument_types();
986
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
13
    DataTypes get_variadic_argument_types_impl() const override {
985
13
        return impl::get_variadic_argument_types();
986
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
14
    DataTypes get_variadic_argument_types_impl() const override {
985
14
        return impl::get_variadic_argument_types();
986
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
27
    DataTypes get_variadic_argument_types_impl() const override {
985
27
        return impl::get_variadic_argument_types();
986
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
29
    DataTypes get_variadic_argument_types_impl() const override {
985
29
        return impl::get_variadic_argument_types();
986
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
984
37
    DataTypes get_variadic_argument_types_impl() const override {
985
37
        return impl::get_variadic_argument_types();
986
37
    }
987
988
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
989
322
                        uint32_t result, size_t input_rows_count) const override {
990
322
        return impl::execute(context, block, arguments, result, input_rows_count);
991
322
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
32
                        uint32_t result, size_t input_rows_count) const override {
990
32
        return impl::execute(context, block, arguments, result, input_rows_count);
991
32
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
21
                        uint32_t result, size_t input_rows_count) const override {
990
21
        return impl::execute(context, block, arguments, result, input_rows_count);
991
21
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
26
                        uint32_t result, size_t input_rows_count) const override {
990
26
        return impl::execute(context, block, arguments, result, input_rows_count);
991
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
7
                        uint32_t result, size_t input_rows_count) const override {
990
7
        return impl::execute(context, block, arguments, result, input_rows_count);
991
7
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
16
                        uint32_t result, size_t input_rows_count) const override {
990
16
        return impl::execute(context, block, arguments, result, input_rows_count);
991
16
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
69
                        uint32_t result, size_t input_rows_count) const override {
990
69
        return impl::execute(context, block, arguments, result, input_rows_count);
991
69
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
1
                        uint32_t result, size_t input_rows_count) const override {
990
1
        return impl::execute(context, block, arguments, result, input_rows_count);
991
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
5
                        uint32_t result, size_t input_rows_count) const override {
990
5
        return impl::execute(context, block, arguments, result, input_rows_count);
991
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
6
                        uint32_t result, size_t input_rows_count) const override {
990
6
        return impl::execute(context, block, arguments, result, input_rows_count);
991
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
50
                        uint32_t result, size_t input_rows_count) const override {
990
50
        return impl::execute(context, block, arguments, result, input_rows_count);
991
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
41
                        uint32_t result, size_t input_rows_count) const override {
990
41
        return impl::execute(context, block, arguments, result, input_rows_count);
991
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
989
48
                        uint32_t result, size_t input_rows_count) const override {
990
48
        return impl::execute(context, block, arguments, result, input_rows_count);
991
48
    }
992
};
993
994
struct UnHexImplEmpty {
995
    static constexpr auto name = "unhex";
996
};
997
998
struct UnHexImplNull {
999
    static constexpr auto name = "unhex_null";
1000
};
1001
1002
template <typename Name>
1003
struct UnHexImpl {
1004
    static constexpr auto name = Name::name;
1005
    using ReturnType = DataTypeString;
1006
    using ColumnType = ColumnString;
1007
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1008
1009
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1010
120
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1011
120
        auto rows_count = offsets.size();
1012
120
        dst_offsets.resize(rows_count);
1013
1014
120
        int64_t total_size = 0;
1015
254
        for (size_t i = 0; i < rows_count; i++) {
1016
134
            size_t len = offsets[i] - offsets[i - 1];
1017
134
            total_size += len / 2;
1018
134
        }
1019
120
        ColumnString::check_chars_length(total_size, rows_count);
1020
120
        dst_data.resize(total_size);
1021
120
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1022
120
        size_t offset = 0;
1023
1024
254
        for (int i = 0; i < rows_count; ++i) {
1025
134
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1026
134
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1027
1028
134
            if (UNLIKELY(srclen == 0)) {
1029
12
                dst_offsets[i] = cast_set<uint32_t>(offset);
1030
12
                continue;
1031
12
            }
1032
1033
122
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1034
1035
122
            offset += outlen;
1036
122
            dst_offsets[i] = cast_set<uint32_t>(offset);
1037
122
        }
1038
120
        dst_data.pop_back(total_size - offset);
1039
120
        return Status::OK();
1040
120
    }
1041
1042
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1043
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1044
33
                         ColumnUInt8::Container* null_map_data) {
1045
33
        auto rows_count = offsets.size();
1046
33
        dst_offsets.resize(rows_count);
1047
1048
33
        int64_t total_size = 0;
1049
84
        for (size_t i = 0; i < rows_count; i++) {
1050
51
            size_t len = offsets[i] - offsets[i - 1];
1051
51
            total_size += len / 2;
1052
51
        }
1053
33
        ColumnString::check_chars_length(total_size, rows_count);
1054
33
        dst_data.resize(total_size);
1055
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1056
33
        size_t offset = 0;
1057
1058
84
        for (int i = 0; i < rows_count; ++i) {
1059
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1060
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1061
1062
51
            if (UNLIKELY(srclen == 0)) {
1063
7
                (*null_map_data)[i] = 1;
1064
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1065
7
                continue;
1066
7
            }
1067
1068
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1069
1070
44
            if (outlen == 0) {
1071
13
                (*null_map_data)[i] = 1;
1072
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1073
13
                continue;
1074
13
            }
1075
1076
31
            offset += outlen;
1077
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1078
31
        }
1079
33
        dst_data.pop_back(total_size - offset);
1080
33
        return Status::OK();
1081
33
    }
1082
};
1083
1084
struct NameStringSpace {
1085
    static constexpr auto name = "space";
1086
};
1087
1088
struct StringSpace {
1089
    using ReturnType = DataTypeString;
1090
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1091
    using Type = Int32;
1092
    using ReturnColumnType = ColumnString;
1093
1094
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1095
8
                         ColumnString::Offsets& res_offsets) {
1096
8
        res_offsets.resize(data.size());
1097
8
        size_t input_size = res_offsets.size();
1098
8
        int64_t total_size = 0;
1099
16
        for (size_t i = 0; i < input_size; ++i) {
1100
8
            if (data[i] > 0) {
1101
4
                total_size += data[i];
1102
4
            }
1103
8
        }
1104
8
        ColumnString::check_chars_length(total_size, input_size);
1105
8
        res_data.reserve(total_size);
1106
1107
16
        for (size_t i = 0; i < input_size; ++i) {
1108
8
            if (data[i] > 0) [[likely]] {
1109
4
                res_data.resize_fill(res_data.size() + data[i], ' ');
1110
4
                cast_set(res_offsets[i], res_data.size());
1111
4
            } else {
1112
4
                StringOP::push_empty_string(i, res_data, res_offsets);
1113
4
            }
1114
8
        }
1115
8
        return Status::OK();
1116
8
    }
1117
};
1118
1119
struct ToBase64Impl {
1120
    static constexpr auto name = "to_base64";
1121
    using ReturnType = DataTypeString;
1122
    using ColumnType = ColumnString;
1123
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1124
1125
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1126
154
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1127
154
        auto rows_count = offsets.size();
1128
154
        dst_offsets.resize(rows_count);
1129
1130
154
        size_t total_size = 0;
1131
319
        for (size_t i = 0; i < rows_count; i++) {
1132
165
            size_t len = offsets[i] - offsets[i - 1];
1133
165
            total_size += 4 * ((len + 2) / 3);
1134
165
        }
1135
154
        ColumnString::check_chars_length(total_size, rows_count);
1136
154
        dst_data.resize(total_size);
1137
154
        auto* dst_data_ptr = dst_data.data();
1138
154
        size_t offset = 0;
1139
1140
319
        for (int i = 0; i < rows_count; ++i) {
1141
165
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1142
165
            size_t srclen = offsets[i] - offsets[i - 1];
1143
1144
165
            if (UNLIKELY(srclen == 0)) {
1145
10
                dst_offsets[i] = cast_set<uint32_t>(offset);
1146
10
                continue;
1147
10
            }
1148
1149
155
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1150
155
                                               (unsigned char*)(dst_data_ptr + offset));
1151
1152
155
            offset += outlen;
1153
155
            dst_offsets[i] = cast_set<uint32_t>(offset);
1154
155
        }
1155
154
        dst_data.pop_back(total_size - offset);
1156
154
        return Status::OK();
1157
154
    }
1158
};
1159
1160
struct FromBase64Impl {
1161
    static constexpr auto name = "from_base64";
1162
    using ReturnType = DataTypeString;
1163
    using ColumnType = ColumnString;
1164
1165
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1166
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1167
149
                         NullMap& null_map) {
1168
149
        auto rows_count = offsets.size();
1169
149
        dst_offsets.resize(rows_count);
1170
1171
149
        size_t total_size = 0;
1172
310
        for (size_t i = 0; i < rows_count; i++) {
1173
161
            auto len = offsets[i] - offsets[i - 1];
1174
161
            total_size += len / 4 * 3;
1175
161
        }
1176
149
        ColumnString::check_chars_length(total_size, rows_count);
1177
149
        dst_data.resize(total_size);
1178
149
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1179
149
        size_t offset = 0;
1180
1181
310
        for (int i = 0; i < rows_count; ++i) {
1182
161
            if (UNLIKELY(null_map[i])) {
1183
0
                null_map[i] = 1;
1184
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1185
0
                continue;
1186
0
            }
1187
1188
161
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1189
161
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1190
1191
161
            if (UNLIKELY(srclen == 0)) {
1192
9
                dst_offsets[i] = cast_set<uint32_t>(offset);
1193
9
                continue;
1194
9
            }
1195
1196
152
            if (UNLIKELY(srclen % 4 != 0)) {
1197
20
                null_map[i] = 1;
1198
20
                dst_offsets[i] = cast_set<uint32_t>(offset);
1199
20
                continue;
1200
20
            }
1201
1202
132
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1203
1204
132
            if (outlen < 0) {
1205
4
                null_map[i] = 1;
1206
4
                dst_offsets[i] = cast_set<uint32_t>(offset);
1207
128
            } else {
1208
128
                offset += outlen;
1209
128
                dst_offsets[i] = cast_set<uint32_t>(offset);
1210
128
            }
1211
132
        }
1212
149
        dst_data.pop_back(total_size - offset);
1213
149
        return Status::OK();
1214
149
    }
1215
};
1216
1217
struct StringAppendTrailingCharIfAbsent {
1218
    static constexpr auto name = "append_trailing_char_if_absent";
1219
    using Chars = ColumnString::Chars;
1220
    using Offsets = ColumnString::Offsets;
1221
    using ReturnType = DataTypeString;
1222
    using ColumnType = ColumnString;
1223
1224
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1225
48
        if (str.size < end.size) {
1226
11
            return false;
1227
11
        }
1228
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1229
37
        return str.end_with(end);
1230
48
    }
1231
1232
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1233
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1234
40
                              Offsets& res_offsets, NullMap& null_map_data) {
1235
40
        DCHECK_EQ(loffsets.size(), roffsets.size());
1236
40
        size_t input_rows_count = loffsets.size();
1237
40
        res_offsets.resize(input_rows_count);
1238
40
        fmt::memory_buffer buffer;
1239
1240
92
        for (size_t i = 0; i < input_rows_count; ++i) {
1241
52
            buffer.clear();
1242
1243
52
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1244
52
                                       loffsets[i] - loffsets[i - 1]);
1245
52
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1246
52
                                       roffsets[i] - roffsets[i - 1]);
1247
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1248
52
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1249
52
                    rstr.begin(), rstr.end(), 2);
1250
1251
52
            if (char_len != 1) {
1252
16
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1253
16
                continue;
1254
16
            }
1255
36
            if (str_end_with(lstr, rstr)) {
1256
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1257
9
                continue;
1258
9
            }
1259
1260
27
            buffer.append(lstr.begin(), lstr.end());
1261
27
            buffer.append(rstr.begin(), rstr.end());
1262
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1263
27
                                        res_offsets);
1264
27
        }
1265
40
    }
1266
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1267
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1268
8
                              NullMap& null_map_data) {
1269
8
        size_t input_rows_count = loffsets.size();
1270
8
        res_offsets.resize(input_rows_count);
1271
8
        fmt::memory_buffer buffer;
1272
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1273
8
        auto [byte_len, char_len] =
1274
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1275
8
        if (char_len != 1) {
1276
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1277
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1278
2
            }
1279
2
            return;
1280
2
        }
1281
1282
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1283
6
            buffer.clear();
1284
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1285
6
                                       loffsets[i] - loffsets[i - 1]);
1286
1287
6
            if (str_end_with(lstr, rstr)) {
1288
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1289
2
                continue;
1290
2
            }
1291
1292
4
            buffer.append(lstr.begin(), lstr.end());
1293
4
            buffer.append(rstr.begin(), rstr.end());
1294
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1295
4
                                        res_offsets);
1296
4
        }
1297
6
    }
1298
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1299
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1300
8
                              NullMap& null_map_data) {
1301
8
        size_t input_rows_count = roffsets.size();
1302
8
        res_offsets.resize(input_rows_count);
1303
8
        fmt::memory_buffer buffer;
1304
1305
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1306
8
            buffer.clear();
1307
1308
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1309
8
                                       roffsets[i] - roffsets[i - 1]);
1310
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1311
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1312
8
                    rstr.begin(), rstr.end(), 2);
1313
1314
8
            if (char_len != 1) {
1315
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1316
2
                continue;
1317
2
            }
1318
6
            if (str_end_with(lstr, rstr)) {
1319
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1320
2
                continue;
1321
2
            }
1322
1323
4
            buffer.append(lstr.begin(), lstr.end());
1324
4
            buffer.append(rstr.begin(), rstr.end());
1325
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1326
4
                                        res_offsets);
1327
4
        }
1328
8
    }
1329
};
1330
1331
struct StringLPad {
1332
    static constexpr auto name = "lpad";
1333
    static constexpr auto is_lpad = true;
1334
};
1335
1336
struct StringRPad {
1337
    static constexpr auto name = "rpad";
1338
    static constexpr auto is_lpad = false;
1339
};
1340
1341
template <typename LeftDataType, typename RightDataType>
1342
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1343
1344
template <typename LeftDataType, typename RightDataType>
1345
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1346
1347
template <typename LeftDataType, typename RightDataType>
1348
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1349
1350
// ready for regist function
1351
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1352
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1353
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1354
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1355
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1356
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1357
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1358
1359
class FunctionStringStartsWith : public FunctionBinaryToType<DataTypeString, DataTypeString,
1360
                                                             StringStartsWithImpl, NameStartsWith> {
1361
public:
1362
100
    static FunctionPtr create() { return std::make_shared<FunctionStringStartsWith>(); }
1363
1364
    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx,
1365
7
                                                const VExprSPtrs& arguments) const override {
1366
7
        auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
1367
7
        auto slot_type = expr_zonemap::fetch_compatible_slot_type(ctx, slot_literal->slot_index,
1368
7
                                                                  slot_literal->slot_type);
1369
7
        if (slot_type == nullptr) {
1370
1
            return unsupported_zonemap_filter(ctx);
1371
1
        }
1372
6
        auto zone_map_ref = ctx.zone_map(slot_literal->slot_index);
1373
6
        if (zone_map_ref == nullptr) {
1374
0
            return unsupported_zonemap_filter(ctx);
1375
0
        }
1376
6
        const auto& zone_map = *zone_map_ref;
1377
6
        if (!zone_map.has_not_null) {
1378
1
            return ZoneMapFilterResult::kNoMatch;
1379
1
        }
1380
5
        if (!expr_zonemap::range_stats_usable_for_zonemap(zone_map, slot_type)) {
1381
0
            return unsupported_zonemap_filter(ctx);
1382
0
        }
1383
1384
5
        const auto prefix = slot_literal->literal.as_string_view();
1385
5
        auto lower = Field::create_field<TYPE_STRING>(std::string(prefix));
1386
5
        if (zone_map.max_value < lower) {
1387
2
            return ZoneMapFilterResult::kNoMatch;
1388
2
        }
1389
3
        auto upper_prefix = _next_prefix_for_starts_with_zonemap(prefix);
1390
3
        if (upper_prefix.has_value() &&
1391
3
            !(zone_map.min_value < Field::create_field<TYPE_STRING>(*upper_prefix))) {
1392
1
            return ZoneMapFilterResult::kNoMatch;
1393
1
        }
1394
2
        return ZoneMapFilterResult::kMayMatch;
1395
3
    }
1396
1397
6
    bool can_evaluate_zonemap_filter(const VExprSPtrs& arguments) const override {
1398
6
        auto slot_literal = expr_zonemap::extract_slot_and_literal(arguments);
1399
6
        if (!slot_literal.has_value() || slot_literal->literal_on_left) {
1400
1
            return false;
1401
1
        }
1402
1403
        // A NULL prefix makes starts_with(slot, NULL) evaluate to NULL. An empty prefix matches
1404
        // every non-NULL string and cannot prune by range. Reject both shapes here before
1405
        // evaluate_zonemap_filter is called.
1406
5
        if (slot_literal->literal.is_null()) {
1407
1
            return false;
1408
1
        }
1409
1410
4
        DORIS_CHECK(slot_literal->slot_type != nullptr);
1411
4
        DORIS_CHECK(slot_literal->literal_type != nullptr);
1412
4
        DORIS_CHECK(is_string_type(remove_nullable(slot_literal->slot_type)->get_primitive_type()));
1413
4
        DORIS_CHECK(
1414
4
                is_string_type(remove_nullable(slot_literal->literal_type)->get_primitive_type()));
1415
1416
4
        const auto prefix = slot_literal->literal.as_string_view();
1417
4
        return !prefix.empty();
1418
5
    }
1419
1420
private:
1421
    static std::optional<std::string> _next_prefix_for_starts_with_zonemap(
1422
3
            std::string_view prefix) {
1423
        // ZoneMap string bounds are compared by bytewise Field ordering. For starts_with(s, p),
1424
        // the safe upper bound is the next byte string after p: p <= s < next_prefix(p).
1425
        // For example, starts_with(s, "ab") can use the range "ab" <= s < "ac".
1426
3
        std::string upper(prefix);
1427
4
        for (auto i = static_cast<int64_t>(upper.size()) - 1; i >= 0; --i) {
1428
3
            auto byte = static_cast<unsigned char>(upper[i]);
1429
3
            if (byte != std::numeric_limits<unsigned char>::max()) {
1430
2
                upper[i] = static_cast<char>(byte + 1);
1431
2
                upper.resize(i + 1);
1432
2
                return upper;
1433
2
            }
1434
3
        }
1435
1
        return std::nullopt;
1436
3
    }
1437
};
1438
1439
using FunctionStringEndsWith =
1440
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1441
using FunctionStringInstr =
1442
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1443
using FunctionStringLocate =
1444
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1445
using FunctionStringFindInSet =
1446
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1447
1448
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1449
1450
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1451
1452
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1453
1454
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1455
1456
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1457
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1458
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1459
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1460
1461
using FunctionStringAppendTrailingCharIfAbsent =
1462
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1463
1464
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1465
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1466
1467
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1468
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1469
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1470
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1471
extern void register_function_string_search(SimpleFunctionFactory& factory);
1472
extern void register_function_string_url(SimpleFunctionFactory& factory);
1473
1474
8
void register_function_string(SimpleFunctionFactory& factory) {
1475
8
    register_function_string_basic(factory);
1476
8
    register_function_string_digest(factory);
1477
8
    register_function_string_mask(factory);
1478
8
    register_function_string_misc(factory);
1479
8
    register_function_string_search(factory);
1480
8
    register_function_string_url(factory);
1481
1482
8
    factory.register_function<FunctionStringParseDataSize>();
1483
8
    factory.register_function<FunctionStringASCII>();
1484
8
    factory.register_function<FunctionStringLength>();
1485
8
    factory.register_function<FunctionCrc32>();
1486
8
    factory.register_function<FunctionStringUTF8Length>();
1487
8
    factory.register_function<FunctionStringSpace>();
1488
8
    factory.register_function<FunctionStringStartsWith>();
1489
8
    factory.register_function<FunctionStringEndsWith>();
1490
8
    factory.register_function<FunctionStringInstr>();
1491
8
    factory.register_function<FunctionStringFindInSet>();
1492
8
    factory.register_function<FunctionStringLocate>();
1493
8
    factory.register_function<FunctionQuote>();
1494
8
    factory.register_function<FunctionReverseCommon>();
1495
8
    factory.register_function<FunctionUnHex>();
1496
8
    factory.register_function<FunctionUnHexNullable>();
1497
8
    factory.register_function<FunctionToLower>();
1498
8
    factory.register_function<FunctionToUpper>();
1499
8
    factory.register_function<FunctionToInitcap>();
1500
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1501
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1502
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1503
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1504
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1505
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1506
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1507
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1508
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1509
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1510
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1511
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1512
8
    factory.register_function<FunctionStringConcat>();
1513
8
    factory.register_function<FunctionStringElt>();
1514
8
    factory.register_function<FunctionStringConcatWs>();
1515
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1516
8
    factory.register_function<FunctionStringRepeat>();
1517
8
    factory.register_function<FunctionStringLPad>();
1518
8
    factory.register_function<FunctionStringRPad>();
1519
8
    factory.register_function<FunctionToBase64>();
1520
8
    factory.register_function<FunctionFromBase64>();
1521
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1522
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1523
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1524
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1525
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1526
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1527
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1528
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1529
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1530
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1531
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1532
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1533
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1534
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1535
8
    factory.register_function<
1536
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1537
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1538
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1539
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1540
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1541
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1542
8
    factory.register_function<FunctionOverlay>();
1543
8
    factory.register_function<FunctionIsValidUTF8>();
1544
1545
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1546
8
    factory.register_alias(FunctionToLower::name, "lcase");
1547
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1548
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1549
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1550
8
    factory.register_alias(FunctionOverlay::name, "insert");
1551
8
}
1552
1553
} // namespace doris