Coverage Report

Created: 2026-05-29 11:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <cstddef>
28
#include <cstdint>
29
#include <string_view>
30
31
#include "common/cast_set.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "core/column/column_string.h"
35
#include "core/pod_array_fwd.h"
36
#include "core/string_ref.h"
37
#include "exprs/function/function_reverse.h"
38
#include "exprs/function/function_string_concat.h"
39
#include "exprs/function/function_string_format.h"
40
#include "exprs/function/function_string_replace.h"
41
#include "exprs/function/function_string_to_string.h"
42
#include "exprs/function/function_totype.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/function/string_hex_util.h"
45
#include "util/string_search.hpp"
46
#include "util/url_coding.h"
47
#include "util/utf8_check.h"
48
49
namespace doris {
50
struct NameStringASCII {
51
    static constexpr auto name = "ascii";
52
};
53
54
struct StringASCII {
55
    using ReturnType = DataTypeInt32;
56
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
57
    using Type = String;
58
    using ReturnColumnType = ColumnInt32;
59
60
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
61
38
                         PaddedPODArray<Int32>& res) {
62
38
        auto size = offsets.size();
63
38
        res.resize(size);
64
86
        for (int i = 0; i < size; ++i) {
65
48
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
66
48
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
67
48
        }
68
38
        return Status::OK();
69
38
    }
70
};
71
72
struct NameParseDataSize {
73
    static constexpr auto name = "parse_data_size";
74
};
75
76
static const std::map<std::string_view, Int128> UNITS = {
77
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
78
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
79
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
80
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
81
        {"YB", static_cast<Int128>(1) << 80}};
82
83
struct ParseDataSize {
84
    using ReturnType = DataTypeInt128;
85
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
86
    using Type = String;
87
    using ReturnColumnType = ColumnInt128;
88
89
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
90
48
                         PaddedPODArray<Int128>& res) {
91
48
        auto size = offsets.size();
92
48
        res.resize(size);
93
100
        for (int i = 0; i < size; ++i) {
94
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
95
52
            int str_size = offsets[i] - offsets[i - 1];
96
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
97
52
        }
98
48
        return Status::OK();
99
48
    }
100
101
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
102
52
        int digit_length = 0;
103
216
        for (char c : dataSize) {
104
216
            if (isdigit(c) || c == '.') {
105
166
                digit_length++;
106
166
            } else {
107
50
                break;
108
50
            }
109
216
        }
110
111
52
        if (digit_length == 0) {
112
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
113
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
114
4
                                   dataSize);
115
4
        }
116
        // 123.45MB--->123.45 : MB
117
48
        double value = 0.0;
118
48
        try {
119
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
120
48
        } catch (const std::exception& e) {
121
0
            throw doris::Exception(
122
0
                    ErrorCode::INVALID_ARGUMENT,
123
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
124
0
                    dataSize, e.what());
125
0
        }
126
48
        auto unit = dataSize.substr(digit_length);
127
48
        auto it = UNITS.find(unit);
128
48
        if (it != UNITS.end()) {
129
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
130
45
        } else {
131
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
132
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
133
3
                                   dataSize);
134
3
        }
135
48
    }
136
};
137
138
struct NameQuote {
139
    static constexpr auto name = "quote";
140
};
141
142
struct NameQuoteImpl {
143
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
144
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
145
17
        size_t offset_size = offsets.size();
146
17
        ColumnString::Offset pos = 0;
147
17
        res_offsets.resize(offset_size);
148
17
        res_data.resize(data.size() + offset_size * 2);
149
45
        for (int i = 0; i < offset_size; i++) {
150
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
151
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
152
28
            res_data[pos] = '\'';
153
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
154
28
            res_data[pos + size + 1] = '\'';
155
28
            pos += size + 2;
156
28
            res_offsets[i] = pos;
157
28
        }
158
17
        return Status::OK();
159
17
    }
160
};
161
162
struct NameStringLength {
163
    static constexpr auto name = "length";
164
};
165
166
struct StringLengthImpl {
167
    using ReturnType = DataTypeInt32;
168
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
169
    using Type = String;
170
    using ReturnColumnType = ColumnInt32;
171
172
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
173
810
                         PaddedPODArray<Int32>& res) {
174
810
        auto size = offsets.size();
175
810
        res.resize(size);
176
278k
        for (int i = 0; i < size; ++i) {
177
277k
            int str_size = offsets[i] - offsets[i - 1];
178
277k
            res[i] = str_size;
179
277k
        }
180
810
        return Status::OK();
181
810
    }
182
};
183
184
struct NameCrc32 {
185
    static constexpr auto name = "crc32";
186
};
187
188
struct Crc32Impl {
189
    using ReturnType = DataTypeInt64;
190
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
191
    using Type = String;
192
    using ReturnColumnType = ColumnInt64;
193
194
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
195
3
                         PaddedPODArray<Int64>& res) {
196
3
        auto size = offsets.size();
197
3
        res.resize(size);
198
6
        for (int i = 0; i < size; ++i) {
199
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
200
3
                             offsets[i] - offsets[i - 1]);
201
3
        }
202
3
        return Status::OK();
203
3
    }
204
};
205
206
struct NameStringUtf8Length {
207
    static constexpr auto name = "char_length";
208
};
209
210
struct StringUtf8LengthImpl {
211
    using ReturnType = DataTypeInt32;
212
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
213
    using Type = String;
214
    using ReturnColumnType = ColumnInt32;
215
216
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
217
42
                         PaddedPODArray<Int32>& res) {
218
42
        auto size = offsets.size();
219
42
        res.resize(size);
220
98
        for (int i = 0; i < size; ++i) {
221
56
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
222
56
            int str_size = offsets[i] - offsets[i - 1];
223
56
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
224
56
        }
225
42
        return Status::OK();
226
42
    }
227
};
228
229
struct NameIsValidUTF8 {
230
    static constexpr auto name = "is_valid_utf8";
231
};
232
233
struct IsValidUTF8Impl {
234
    using ReturnType = DataTypeUInt8;
235
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
236
    using Type = String;
237
    using ReturnColumnType = ColumnUInt8;
238
239
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
240
39
                         PaddedPODArray<UInt8>& res) {
241
39
        auto size = offsets.size();
242
39
        res.resize(size);
243
98
        for (size_t i = 0; i < size; ++i) {
244
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
245
59
            size_t str_size = offsets[i] - offsets[i - 1];
246
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
247
59
        }
248
39
        return Status::OK();
249
39
    }
250
};
251
252
struct NameStartsWith {
253
    static constexpr auto name = "starts_with";
254
};
255
256
struct StartsWithOp {
257
    using ResultDataType = DataTypeUInt8;
258
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
259
260
84
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
261
84
        res = strl.starts_with(strr);
262
84
    }
263
};
264
265
struct NameEndsWith {
266
    static constexpr auto name = "ends_with";
267
};
268
269
struct EndsWithOp {
270
    using ResultDataType = DataTypeUInt8;
271
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
272
273
92
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
274
92
        res = strl.ends_with(strr);
275
92
    }
276
};
277
278
struct NameFindInSet {
279
    static constexpr auto name = "find_in_set";
280
};
281
282
struct FindInSetOp {
283
    using ResultDataType = DataTypeInt32;
284
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
285
119
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
286
273
        for (const auto& c : strl) {
287
273
            if (c == ',') {
288
21
                res = 0;
289
21
                return;
290
21
            }
291
273
        }
292
293
98
        int32_t token_index = 1;
294
98
        int32_t start = 0;
295
98
        int32_t end;
296
297
202
        do {
298
202
            end = start;
299
            // Position end.
300
608
            while (end < strr.length() && strr[end] != ',') {
301
406
                ++end;
302
406
            }
303
304
202
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
305
43
                res = token_index;
306
43
                return;
307
43
            }
308
309
            // Re-position start and end past ','
310
159
            start = end + 1;
311
159
            ++token_index;
312
159
        } while (start < strr.length());
313
55
        res = 0;
314
55
    }
315
};
316
317
struct NameInstr {
318
    static constexpr auto name = "instr";
319
};
320
321
// LeftDataType and RightDataType are DataTypeString
322
template <typename LeftDataType, typename RightDataType>
323
struct StringInStrImpl {
324
    using ResultDataType = DataTypeInt32;
325
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
326
327
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
328
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
329
72
        StringRef lstr_ref(ldata.data, ldata.size);
330
331
72
        auto size = roffsets.size();
332
72
        res.resize(size);
333
144
        for (int i = 0; i < size; ++i) {
334
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
335
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
336
337
72
            StringRef rstr_ref(r_raw_str, r_str_size);
338
339
72
            res[i] = execute(lstr_ref, rstr_ref);
340
72
        }
341
342
72
        return Status::OK();
343
72
    }
344
345
    static Status vector_scalar(const ColumnString::Chars& ldata,
346
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
347
86
                                ResultPaddedPODArray& res) {
348
86
        auto size = loffsets.size();
349
86
        res.resize(size);
350
351
86
        if (rdata.size == 0) {
352
12
            std::fill(res.begin(), res.end(), 1);
353
12
            return Status::OK();
354
12
        }
355
356
74
        const UInt8* begin = ldata.data();
357
74
        const UInt8* end = begin + ldata.size();
358
74
        const UInt8* pos = begin;
359
360
        /// Current index in the array of strings.
361
74
        size_t i = 0;
362
74
        std::fill(res.begin(), res.end(), 0);
363
364
74
        StringRef rstr_ref(rdata.data, rdata.size);
365
74
        StringSearch search(&rstr_ref);
366
367
90
        while (pos < end) {
368
            // search return matched substring start offset
369
64
            pos = (UInt8*)search.search((char*)pos, end - pos);
370
64
            if (pos >= end) {
371
48
                break;
372
48
            }
373
374
            /// Determine which index it refers to.
375
            /// begin + value_offsets[i] is the start offset of string at i+1
376
16
            while (begin + loffsets[i] < pos) {
377
0
                ++i;
378
0
            }
379
380
            /// We check that the entry does not pass through the boundaries of strings.
381
16
            if (pos + rdata.size <= begin + loffsets[i]) {
382
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
383
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
384
16
                auto len = std::min(l_str_size, loc);
385
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
386
16
                res[i] = loc + 1;
387
16
            }
388
389
            // move to next string offset
390
16
            pos = begin + loffsets[i];
391
16
            ++i;
392
16
        }
393
394
74
        return Status::OK();
395
86
    }
396
397
    static Status vector_vector(const ColumnString::Chars& ldata,
398
                                const ColumnString::Offsets& loffsets,
399
                                const ColumnString::Chars& rdata,
400
143
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
401
143
        DCHECK_EQ(loffsets.size(), roffsets.size());
402
403
143
        auto size = loffsets.size();
404
143
        res.resize(size);
405
397
        for (int i = 0; i < size; ++i) {
406
254
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
407
254
            int l_str_size = loffsets[i] - loffsets[i - 1];
408
254
            StringRef lstr_ref(l_raw_str, l_str_size);
409
410
254
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
411
254
            int r_str_size = roffsets[i] - roffsets[i - 1];
412
254
            StringRef rstr_ref(r_raw_str, r_str_size);
413
414
254
            res[i] = execute(lstr_ref, rstr_ref);
415
254
        }
416
417
143
        return Status::OK();
418
143
    }
419
420
326
    static int execute(const StringRef& strl, const StringRef& strr) {
421
326
        if (strr.size == 0) {
422
71
            return 1;
423
71
        }
424
425
255
        StringSearch search(&strr);
426
        // Hive returns positions starting from 1.
427
255
        int loc = search.search(&strl);
428
255
        if (loc > 0) {
429
43
            int len = std::min(loc, (int)strl.size);
430
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
431
43
        }
432
433
255
        return loc + 1;
434
326
    }
435
};
436
437
// the same impl as instr
438
struct NameLocate {
439
    static constexpr auto name = "locate";
440
};
441
442
// LeftDataType and RightDataType are DataTypeString
443
template <typename LeftDataType, typename RightDataType>
444
struct StringLocateImpl {
445
    using ResultDataType = DataTypeInt32;
446
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
447
448
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
449
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
450
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
451
38
                                                                           res);
452
38
    }
453
454
    static Status vector_scalar(const ColumnString::Chars& ldata,
455
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
456
36
                                ResultPaddedPODArray& res) {
457
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
458
36
                                                                           res);
459
36
    }
460
461
    static Status vector_vector(const ColumnString::Chars& ldata,
462
                                const ColumnString::Offsets& loffsets,
463
                                const ColumnString::Chars& rdata,
464
78
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
465
78
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
466
78
                                                                           loffsets, res);
467
78
    }
468
};
469
470
// LeftDataType and RightDataType are DataTypeString
471
template <typename LeftDataType, typename RightDataType, typename OP>
472
struct StringFunctionImpl {
473
    using ResultDataType = typename OP::ResultDataType;
474
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
475
476
    static Status vector_vector(const ColumnString::Chars& ldata,
477
                                const ColumnString::Offsets& loffsets,
478
                                const ColumnString::Chars& rdata,
479
163
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
163
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
163
        auto size = loffsets.size();
483
163
        res.resize(size);
484
374
        for (int i = 0; i < size; ++i) {
485
211
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
211
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
211
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
211
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
211
            std::string_view lview(l_raw_str, l_str_size);
492
211
            std::string_view rview(r_raw_str, r_str_size);
493
494
211
            OP::execute(lview, rview, res[i]);
495
211
        }
496
163
        return Status::OK();
497
163
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
479
71
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
71
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
71
        auto size = loffsets.size();
483
71
        res.resize(size);
484
147
        for (int i = 0; i < size; ++i) {
485
76
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
76
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
76
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
76
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
76
            std::string_view lview(l_raw_str, l_str_size);
492
76
            std::string_view rview(r_raw_str, r_str_size);
493
494
76
            OP::execute(lview, rview, res[i]);
495
76
        }
496
71
        return Status::OK();
497
71
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
479
45
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
45
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
45
        auto size = loffsets.size();
483
45
        res.resize(size);
484
109
        for (int i = 0; i < size; ++i) {
485
64
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
64
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
64
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
64
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
64
            std::string_view lview(l_raw_str, l_str_size);
492
64
            std::string_view rview(r_raw_str, r_str_size);
493
494
64
            OP::execute(lview, rview, res[i]);
495
64
        }
496
45
        return Status::OK();
497
45
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
479
47
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
47
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
47
        auto size = loffsets.size();
483
47
        res.resize(size);
484
118
        for (int i = 0; i < size; ++i) {
485
71
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
71
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
71
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
71
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
71
            std::string_view lview(l_raw_str, l_str_size);
492
71
            std::string_view rview(r_raw_str, r_str_size);
493
494
71
            OP::execute(lview, rview, res[i]);
495
71
        }
496
47
        return Status::OK();
497
47
    }
498
    static Status vector_scalar(const ColumnString::Chars& ldata,
499
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
500
34
                                ResultPaddedPODArray& res) {
501
34
        auto size = loffsets.size();
502
34
        res.resize(size);
503
34
        std::string_view rview(rdata.data, rdata.size);
504
68
        for (int i = 0; i < size; ++i) {
505
34
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
34
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
34
            std::string_view lview(l_raw_str, l_str_size);
508
509
34
            OP::execute(lview, rview, res[i]);
510
34
        }
511
34
        return Status::OK();
512
34
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
500
4
                                ResultPaddedPODArray& res) {
501
4
        auto size = loffsets.size();
502
4
        res.resize(size);
503
4
        std::string_view rview(rdata.data, rdata.size);
504
8
        for (int i = 0; i < size; ++i) {
505
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
4
            std::string_view lview(l_raw_str, l_str_size);
508
509
4
            OP::execute(lview, rview, res[i]);
510
4
        }
511
4
        return Status::OK();
512
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
500
14
                                ResultPaddedPODArray& res) {
501
14
        auto size = loffsets.size();
502
14
        res.resize(size);
503
14
        std::string_view rview(rdata.data, rdata.size);
504
28
        for (int i = 0; i < size; ++i) {
505
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
14
            std::string_view lview(l_raw_str, l_str_size);
508
509
14
            OP::execute(lview, rview, res[i]);
510
14
        }
511
14
        return Status::OK();
512
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
500
16
                                ResultPaddedPODArray& res) {
501
16
        auto size = loffsets.size();
502
16
        res.resize(size);
503
16
        std::string_view rview(rdata.data, rdata.size);
504
32
        for (int i = 0; i < size; ++i) {
505
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
16
            std::string_view lview(l_raw_str, l_str_size);
508
509
16
            OP::execute(lview, rview, res[i]);
510
16
        }
511
16
        return Status::OK();
512
16
    }
513
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
514
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
44
        auto size = roffsets.size();
516
44
        res.resize(size);
517
44
        std::string_view lview(ldata.data, ldata.size);
518
94
        for (int i = 0; i < size; ++i) {
519
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
50
            std::string_view rview(r_raw_str, r_str_size);
522
523
50
            OP::execute(lview, rview, res[i]);
524
50
        }
525
44
        return Status::OK();
526
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
514
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
4
        auto size = roffsets.size();
516
4
        res.resize(size);
517
4
        std::string_view lview(ldata.data, ldata.size);
518
8
        for (int i = 0; i < size; ++i) {
519
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
4
            std::string_view rview(r_raw_str, r_str_size);
522
523
4
            OP::execute(lview, rview, res[i]);
524
4
        }
525
4
        return Status::OK();
526
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
514
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
14
        auto size = roffsets.size();
516
14
        res.resize(size);
517
14
        std::string_view lview(ldata.data, ldata.size);
518
28
        for (int i = 0; i < size; ++i) {
519
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
14
            std::string_view rview(r_raw_str, r_str_size);
522
523
14
            OP::execute(lview, rview, res[i]);
524
14
        }
525
14
        return Status::OK();
526
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
514
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
26
        auto size = roffsets.size();
516
26
        res.resize(size);
517
26
        std::string_view lview(ldata.data, ldata.size);
518
58
        for (int i = 0; i < size; ++i) {
519
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
32
            std::string_view rview(r_raw_str, r_str_size);
522
523
32
            OP::execute(lview, rview, res[i]);
524
32
        }
525
26
        return Status::OK();
526
26
    }
527
};
528
529
struct NameToLower {
530
    static constexpr auto name = "lower";
531
};
532
533
struct NameToUpper {
534
    static constexpr auto name = "upper";
535
};
536
537
template <typename OpName>
538
struct TransferImpl {
539
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
540
247
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
247
        size_t offset_size = offsets.size();
542
247
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
247
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
247
        res_offsets.resize(offset_size);
548
247
        if (is_ascii) {
549
188
            memcpy_small_allow_read_write_overflow15(
550
188
                    res_offsets.data(), offsets.data(),
551
188
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
188
            size_t data_length = data.size();
554
188
            res_data.resize(data_length);
555
188
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
53
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
135
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
135
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
135
            }
560
188
        } else {
561
59
            execute_utf8(data, offsets, res_data, res_offsets);
562
59
        }
563
564
247
        return Status::OK();
565
247
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
540
156
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
156
        size_t offset_size = offsets.size();
542
156
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
156
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
156
        res_offsets.resize(offset_size);
548
156
        if (is_ascii) {
549
135
            memcpy_small_allow_read_write_overflow15(
550
135
                    res_offsets.data(), offsets.data(),
551
135
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
135
            size_t data_length = data.size();
554
135
            res_data.resize(data_length);
555
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
135
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
135
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
135
            }
560
135
        } else {
561
21
            execute_utf8(data, offsets, res_data, res_offsets);
562
21
        }
563
564
156
        return Status::OK();
565
156
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
540
91
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
91
        size_t offset_size = offsets.size();
542
91
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
91
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
91
        res_offsets.resize(offset_size);
548
91
        if (is_ascii) {
549
53
            memcpy_small_allow_read_write_overflow15(
550
53
                    res_offsets.data(), offsets.data(),
551
53
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
53
            size_t data_length = data.size();
554
53
            res_data.resize(data_length);
555
53
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
53
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
            }
560
53
        } else {
561
38
            execute_utf8(data, offsets, res_data, res_offsets);
562
38
        }
563
564
91
        return Status::OK();
565
91
    }
566
567
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
568
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
60
        std::string result;
570
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
138
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
138
            result.clear();
575
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
91
                to_upper_utf8(begin, size, result);
577
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
47
                to_lower_utf8(begin, size, result);
579
47
            }
580
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
138
        }
582
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
568
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
21
        std::string result;
570
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
47
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
47
            result.clear();
575
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
                to_upper_utf8(begin, size, result);
577
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
47
                to_lower_utf8(begin, size, result);
579
47
            }
580
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
47
        }
582
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
568
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
39
        std::string result;
570
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
91
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
91
            result.clear();
575
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
91
                to_upper_utf8(begin, size, result);
577
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
                to_lower_utf8(begin, size, result);
579
            }
580
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
91
        }
582
39
    }
583
584
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
585
91
        icu::StringPiece sp;
586
91
        sp.set(data, size);
587
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
588
91
        unicode_str.toUpper();
589
91
        unicode_str.toUTF8String(result);
590
91
    }
591
592
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
593
47
        icu::StringPiece sp;
594
47
        sp.set(data, size);
595
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
596
47
        unicode_str.toLower();
597
47
        unicode_str.toUTF8String(result);
598
47
    }
599
};
600
601
// Capitalize first letter
602
struct NameToInitcap {
603
    static constexpr auto name = "initcap";
604
};
605
606
struct InitcapImpl {
607
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
608
162
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
609
162
        res_offsets.resize(offsets.size());
610
611
162
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
612
162
        if (is_ascii) {
613
105
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
614
105
        } else {
615
57
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
616
57
        }
617
162
        return Status::OK();
618
162
    }
619
620
    static void impl_vectors_ascii(const ColumnString::Chars& data,
621
                                   const ColumnString::Offsets& offsets,
622
                                   ColumnString::Chars& res_data,
623
105
                                   ColumnString::Offsets& res_offsets) {
624
105
        size_t offset_size = offsets.size();
625
105
        memcpy_small_allow_read_write_overflow15(
626
105
                res_offsets.data(), offsets.data(),
627
105
                offset_size * sizeof(ColumnString::Offsets::value_type));
628
629
105
        size_t data_length = data.size();
630
105
        res_data.resize(data_length);
631
105
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
632
633
105
        bool need_capitalize = true;
634
210
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
635
105
            auto end_index = res_offsets[offset_index];
636
105
            need_capitalize = true;
637
638
1.29k
            for (size_t i = start_index; i < end_index; ++i) {
639
1.19k
                if (!::isalnum(res_data[i])) {
640
211
                    need_capitalize = true;
641
980
                } else if (need_capitalize) {
642
                    /*
643
                    https://en.cppreference.com/w/cpp/string/byte/toupper
644
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
645
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
646
                    char my_toupper(char ch)
647
                    {
648
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
649
                    }
650
                    */
651
235
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
652
235
                    need_capitalize = false;
653
235
                }
654
1.19k
            }
655
656
105
            start_index = end_index;
657
105
        }
658
105
    }
659
660
    static void impl_vectors_utf8(const ColumnString::Chars& data,
661
                                  const ColumnString::Offsets& offsets,
662
                                  ColumnString::Chars& res_data,
663
57
                                  ColumnString::Offsets& res_offsets) {
664
57
        std::string result;
665
121
        for (int64_t i = 0; i < offsets.size(); ++i) {
666
64
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
667
64
            uint32_t size = offsets[i] - offsets[i - 1];
668
64
            result.clear();
669
64
            to_initcap_utf8(begin, size, result);
670
64
            StringOP::push_value_string(result, i, res_data, res_offsets);
671
64
        }
672
57
    }
673
674
64
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
675
64
        icu::StringPiece sp;
676
64
        sp.set(data, size);
677
64
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
678
64
        unicode_str.toLower();
679
64
        icu::UnicodeString output_str;
680
64
        bool need_capitalize = true;
681
64
        icu::StringCharacterIterator iter(unicode_str);
682
608
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
683
544
            if (!u_isalnum(ch)) {
684
93
                need_capitalize = true;
685
451
            } else if (need_capitalize) {
686
83
                ch = u_toupper(ch);
687
83
                need_capitalize = false;
688
83
            }
689
544
            output_str.append(ch);
690
544
        }
691
64
        output_str.toUTF8String(result);
692
64
    }
693
};
694
695
struct NameTrim {
696
    static constexpr auto name = "trim";
697
};
698
struct NameLTrim {
699
    static constexpr auto name = "ltrim";
700
};
701
struct NameRTrim {
702
    static constexpr auto name = "rtrim";
703
};
704
struct NameTrimIn {
705
    static constexpr auto name = "trim_in";
706
};
707
struct NameLTrimIn {
708
    static constexpr auto name = "ltrim_in";
709
};
710
struct NameRTrimIn {
711
    static constexpr auto name = "rtrim_in";
712
};
713
template <bool is_ltrim, bool is_rtrim, bool trim_single>
714
struct TrimUtil {
715
    static Status vector(const ColumnString::Chars& str_data,
716
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
717
201
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
201
        const size_t offset_size = str_offsets.size();
719
201
        res_offsets.resize(offset_size);
720
201
        res_data.reserve(str_data.size());
721
450
        for (size_t i = 0; i < offset_size; ++i) {
722
249
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
249
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
249
            if constexpr (is_ltrim) {
726
132
                str_begin =
727
132
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
132
            }
729
249
            if constexpr (is_rtrim) {
730
192
                str_end =
731
192
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
192
            }
733
734
249
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
249
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
249
        }
738
201
        return Status::OK();
739
201
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
42
        const size_t offset_size = str_offsets.size();
719
42
        res_offsets.resize(offset_size);
720
42
        res_data.reserve(str_data.size());
721
112
        for (size_t i = 0; i < offset_size; ++i) {
722
70
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
70
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
70
            if constexpr (is_ltrim) {
726
70
                str_begin =
727
70
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
70
            }
729
70
            if constexpr (is_rtrim) {
730
70
                str_end =
731
70
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
70
            }
733
734
70
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
70
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
70
        }
738
42
        return Status::OK();
739
42
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
36
        const size_t offset_size = str_offsets.size();
719
36
        res_offsets.resize(offset_size);
720
36
        res_data.reserve(str_data.size());
721
82
        for (size_t i = 0; i < offset_size; ++i) {
722
46
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
46
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
46
            if constexpr (is_ltrim) {
726
46
                str_begin =
727
46
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
46
            }
729
            if constexpr (is_rtrim) {
730
                str_end =
731
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
            }
733
734
46
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
46
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
46
        }
738
36
        return Status::OK();
739
36
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
62
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
62
        const size_t offset_size = str_offsets.size();
719
62
        res_offsets.resize(offset_size);
720
62
        res_data.reserve(str_data.size());
721
134
        for (size_t i = 0; i < offset_size; ++i) {
722
72
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
72
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
            if constexpr (is_ltrim) {
726
                str_begin =
727
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
            }
729
72
            if constexpr (is_rtrim) {
730
72
                str_end =
731
72
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
72
            }
733
734
72
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
72
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
72
        }
738
62
        return Status::OK();
739
62
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
5
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
5
        const size_t offset_size = str_offsets.size();
719
5
        res_offsets.resize(offset_size);
720
5
        res_data.reserve(str_data.size());
721
10
        for (size_t i = 0; i < offset_size; ++i) {
722
5
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
5
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
5
            if constexpr (is_ltrim) {
726
5
                str_begin =
727
5
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
5
            }
729
5
            if constexpr (is_rtrim) {
730
5
                str_end =
731
5
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
5
            }
733
734
5
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
5
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
5
        }
738
5
        return Status::OK();
739
5
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
11
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
11
        const size_t offset_size = str_offsets.size();
719
11
        res_offsets.resize(offset_size);
720
11
        res_data.reserve(str_data.size());
721
22
        for (size_t i = 0; i < offset_size; ++i) {
722
11
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
11
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
11
            if constexpr (is_ltrim) {
726
11
                str_begin =
727
11
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
11
            }
729
            if constexpr (is_rtrim) {
730
                str_end =
731
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
            }
733
734
11
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
11
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
11
        }
738
11
        return Status::OK();
739
11
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
45
        const size_t offset_size = str_offsets.size();
719
45
        res_offsets.resize(offset_size);
720
45
        res_data.reserve(str_data.size());
721
90
        for (size_t i = 0; i < offset_size; ++i) {
722
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
45
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
            if constexpr (is_ltrim) {
726
                str_begin =
727
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
            }
729
45
            if constexpr (is_rtrim) {
730
45
                str_end =
731
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
45
            }
733
734
45
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
45
        }
738
45
        return Status::OK();
739
45
    }
740
};
741
template <bool is_ltrim, bool is_rtrim, bool trim_single>
742
struct TrimInUtil {
743
    static Status vector(const ColumnString::Chars& str_data,
744
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
745
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
121
        const size_t offset_size = str_offsets.size();
747
121
        res_offsets.resize(offset_size);
748
121
        res_data.reserve(str_data.size());
749
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
121
                         simd::VStringFunctions::is_ascii(StringRef(
751
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
121
        if (all_ascii) {
754
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
68
        } else {
756
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
53
        }
758
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
43
        const size_t offset_size = str_offsets.size();
747
43
        res_offsets.resize(offset_size);
748
43
        res_data.reserve(str_data.size());
749
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
43
                         simd::VStringFunctions::is_ascii(StringRef(
751
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
43
        if (all_ascii) {
754
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
24
        } else {
756
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
19
        }
758
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
36
        const size_t offset_size = str_offsets.size();
747
36
        res_offsets.resize(offset_size);
748
36
        res_data.reserve(str_data.size());
749
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
36
                         simd::VStringFunctions::is_ascii(StringRef(
751
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
36
        if (all_ascii) {
754
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
19
        } else {
756
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
17
        }
758
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
42
        const size_t offset_size = str_offsets.size();
747
42
        res_offsets.resize(offset_size);
748
42
        res_data.reserve(str_data.size());
749
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
42
                         simd::VStringFunctions::is_ascii(StringRef(
751
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
42
        if (all_ascii) {
754
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
25
        } else {
756
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
17
        }
758
42
    }
759
760
private:
761
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
762
                                     const ColumnString::Offsets& str_offsets,
763
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
764
68
                                     ColumnString::Offsets& res_offsets) {
765
68
        const size_t offset_size = str_offsets.size();
766
68
        std::bitset<128> char_lookup;
767
68
        const char* remove_begin = remove_str.data;
768
68
        const char* remove_end = remove_str.data + remove_str.size;
769
770
251
        while (remove_begin < remove_end) {
771
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
183
            remove_begin += 1;
773
183
        }
774
775
136
        for (size_t i = 0; i < offset_size; ++i) {
776
68
            const char* str_begin =
777
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
68
            const char* left_trim_pos = str_begin;
780
68
            const char* right_trim_pos = str_end;
781
782
68
            if constexpr (is_ltrim) {
783
127
                while (left_trim_pos < str_end) {
784
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
30
                        break;
786
30
                    }
787
84
                    ++left_trim_pos;
788
84
                }
789
43
            }
790
791
68
            if constexpr (is_rtrim) {
792
114
                while (right_trim_pos > left_trim_pos) {
793
100
                    --right_trim_pos;
794
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
35
                        ++right_trim_pos;
796
35
                        break;
797
35
                    }
798
100
                }
799
49
            }
800
801
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
68
        }
805
806
68
        return Status::OK();
807
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
24
                                     ColumnString::Offsets& res_offsets) {
765
24
        const size_t offset_size = str_offsets.size();
766
24
        std::bitset<128> char_lookup;
767
24
        const char* remove_begin = remove_str.data;
768
24
        const char* remove_end = remove_str.data + remove_str.size;
769
770
86
        while (remove_begin < remove_end) {
771
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
62
            remove_begin += 1;
773
62
        }
774
775
48
        for (size_t i = 0; i < offset_size; ++i) {
776
24
            const char* str_begin =
777
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
24
            const char* left_trim_pos = str_begin;
780
24
            const char* right_trim_pos = str_end;
781
782
24
            if constexpr (is_ltrim) {
783
57
                while (left_trim_pos < str_end) {
784
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
17
                        break;
786
17
                    }
787
33
                    ++left_trim_pos;
788
33
                }
789
24
            }
790
791
24
            if constexpr (is_rtrim) {
792
39
                while (right_trim_pos > left_trim_pos) {
793
32
                    --right_trim_pos;
794
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
17
                        ++right_trim_pos;
796
17
                        break;
797
17
                    }
798
32
                }
799
24
            }
800
801
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
24
        }
805
806
24
        return Status::OK();
807
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
19
                                     ColumnString::Offsets& res_offsets) {
765
19
        const size_t offset_size = str_offsets.size();
766
19
        std::bitset<128> char_lookup;
767
19
        const char* remove_begin = remove_str.data;
768
19
        const char* remove_end = remove_str.data + remove_str.size;
769
770
73
        while (remove_begin < remove_end) {
771
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
54
            remove_begin += 1;
773
54
        }
774
775
38
        for (size_t i = 0; i < offset_size; ++i) {
776
19
            const char* str_begin =
777
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
19
            const char* left_trim_pos = str_begin;
780
19
            const char* right_trim_pos = str_end;
781
782
19
            if constexpr (is_ltrim) {
783
70
                while (left_trim_pos < str_end) {
784
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
13
                        break;
786
13
                    }
787
51
                    ++left_trim_pos;
788
51
                }
789
19
            }
790
791
            if constexpr (is_rtrim) {
792
                while (right_trim_pos > left_trim_pos) {
793
                    --right_trim_pos;
794
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
                        ++right_trim_pos;
796
                        break;
797
                    }
798
                }
799
            }
800
801
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
19
        }
805
806
19
        return Status::OK();
807
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
25
                                     ColumnString::Offsets& res_offsets) {
765
25
        const size_t offset_size = str_offsets.size();
766
25
        std::bitset<128> char_lookup;
767
25
        const char* remove_begin = remove_str.data;
768
25
        const char* remove_end = remove_str.data + remove_str.size;
769
770
92
        while (remove_begin < remove_end) {
771
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
67
            remove_begin += 1;
773
67
        }
774
775
50
        for (size_t i = 0; i < offset_size; ++i) {
776
25
            const char* str_begin =
777
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
25
            const char* left_trim_pos = str_begin;
780
25
            const char* right_trim_pos = str_end;
781
782
            if constexpr (is_ltrim) {
783
                while (left_trim_pos < str_end) {
784
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
                        break;
786
                    }
787
                    ++left_trim_pos;
788
                }
789
            }
790
791
25
            if constexpr (is_rtrim) {
792
75
                while (right_trim_pos > left_trim_pos) {
793
68
                    --right_trim_pos;
794
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
18
                        ++right_trim_pos;
796
18
                        break;
797
18
                    }
798
68
                }
799
25
            }
800
801
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
25
        }
805
806
25
        return Status::OK();
807
25
    }
808
809
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
810
                                    const ColumnString::Offsets& str_offsets,
811
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
812
53
                                    ColumnString::Offsets& res_offsets) {
813
53
        const size_t offset_size = str_offsets.size();
814
53
        res_offsets.resize(offset_size);
815
53
        res_data.reserve(str_data.size());
816
817
53
        std::unordered_set<std::string_view> char_lookup;
818
53
        const char* remove_begin = remove_str.data;
819
53
        const char* remove_end = remove_str.data + remove_str.size;
820
821
240
        while (remove_begin < remove_end) {
822
187
            size_t byte_len, char_len;
823
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
187
                    remove_begin, remove_end, 1);
825
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
187
            remove_begin += byte_len;
827
187
        }
828
829
140
        for (size_t i = 0; i < offset_size; ++i) {
830
87
            const char* str_begin =
831
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
87
            const char* left_trim_pos = str_begin;
834
87
            const char* right_trim_pos = str_end;
835
836
87
            if constexpr (is_ltrim) {
837
81
                while (left_trim_pos < str_end) {
838
73
                    size_t byte_len, char_len;
839
73
                    std::tie(byte_len, char_len) =
840
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
73
                                                                                   str_end, 1);
842
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
73
                        char_lookup.end()) {
844
52
                        break;
845
52
                    }
846
21
                    left_trim_pos += byte_len;
847
21
                }
848
60
            }
849
850
87
            if constexpr (is_rtrim) {
851
88
                while (right_trim_pos > left_trim_pos) {
852
80
                    const char* prev_char_pos = right_trim_pos;
853
156
                    do {
854
156
                        --prev_char_pos;
855
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
80
                        char_lookup.end()) {
859
52
                        break;
860
52
                    }
861
28
                    right_trim_pos = prev_char_pos;
862
28
                }
863
60
            }
864
865
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
87
        }
869
53
        return Status::OK();
870
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
19
                                    ColumnString::Offsets& res_offsets) {
813
19
        const size_t offset_size = str_offsets.size();
814
19
        res_offsets.resize(offset_size);
815
19
        res_data.reserve(str_data.size());
816
817
19
        std::unordered_set<std::string_view> char_lookup;
818
19
        const char* remove_begin = remove_str.data;
819
19
        const char* remove_end = remove_str.data + remove_str.size;
820
821
84
        while (remove_begin < remove_end) {
822
65
            size_t byte_len, char_len;
823
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
65
                    remove_begin, remove_end, 1);
825
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
65
            remove_begin += byte_len;
827
65
        }
828
829
52
        for (size_t i = 0; i < offset_size; ++i) {
830
33
            const char* str_begin =
831
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
33
            const char* left_trim_pos = str_begin;
834
33
            const char* right_trim_pos = str_end;
835
836
33
            if constexpr (is_ltrim) {
837
45
                while (left_trim_pos < str_end) {
838
41
                    size_t byte_len, char_len;
839
41
                    std::tie(byte_len, char_len) =
840
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
41
                                                                                   str_end, 1);
842
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
41
                        char_lookup.end()) {
844
29
                        break;
845
29
                    }
846
12
                    left_trim_pos += byte_len;
847
12
                }
848
33
            }
849
850
33
            if constexpr (is_rtrim) {
851
48
                while (right_trim_pos > left_trim_pos) {
852
44
                    const char* prev_char_pos = right_trim_pos;
853
90
                    do {
854
90
                        --prev_char_pos;
855
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
44
                        char_lookup.end()) {
859
29
                        break;
860
29
                    }
861
15
                    right_trim_pos = prev_char_pos;
862
15
                }
863
33
            }
864
865
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
33
        }
869
19
        return Status::OK();
870
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
17
                                    ColumnString::Offsets& res_offsets) {
813
17
        const size_t offset_size = str_offsets.size();
814
17
        res_offsets.resize(offset_size);
815
17
        res_data.reserve(str_data.size());
816
817
17
        std::unordered_set<std::string_view> char_lookup;
818
17
        const char* remove_begin = remove_str.data;
819
17
        const char* remove_end = remove_str.data + remove_str.size;
820
821
78
        while (remove_begin < remove_end) {
822
61
            size_t byte_len, char_len;
823
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
61
                    remove_begin, remove_end, 1);
825
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
61
            remove_begin += byte_len;
827
61
        }
828
829
44
        for (size_t i = 0; i < offset_size; ++i) {
830
27
            const char* str_begin =
831
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
27
            const char* left_trim_pos = str_begin;
834
27
            const char* right_trim_pos = str_end;
835
836
27
            if constexpr (is_ltrim) {
837
36
                while (left_trim_pos < str_end) {
838
32
                    size_t byte_len, char_len;
839
32
                    std::tie(byte_len, char_len) =
840
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
32
                                                                                   str_end, 1);
842
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
32
                        char_lookup.end()) {
844
23
                        break;
845
23
                    }
846
9
                    left_trim_pos += byte_len;
847
9
                }
848
27
            }
849
850
            if constexpr (is_rtrim) {
851
                while (right_trim_pos > left_trim_pos) {
852
                    const char* prev_char_pos = right_trim_pos;
853
                    do {
854
                        --prev_char_pos;
855
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
                        char_lookup.end()) {
859
                        break;
860
                    }
861
                    right_trim_pos = prev_char_pos;
862
                }
863
            }
864
865
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
27
        }
869
17
        return Status::OK();
870
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
17
                                    ColumnString::Offsets& res_offsets) {
813
17
        const size_t offset_size = str_offsets.size();
814
17
        res_offsets.resize(offset_size);
815
17
        res_data.reserve(str_data.size());
816
817
17
        std::unordered_set<std::string_view> char_lookup;
818
17
        const char* remove_begin = remove_str.data;
819
17
        const char* remove_end = remove_str.data + remove_str.size;
820
821
78
        while (remove_begin < remove_end) {
822
61
            size_t byte_len, char_len;
823
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
61
                    remove_begin, remove_end, 1);
825
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
61
            remove_begin += byte_len;
827
61
        }
828
829
44
        for (size_t i = 0; i < offset_size; ++i) {
830
27
            const char* str_begin =
831
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
27
            const char* left_trim_pos = str_begin;
834
27
            const char* right_trim_pos = str_end;
835
836
            if constexpr (is_ltrim) {
837
                while (left_trim_pos < str_end) {
838
                    size_t byte_len, char_len;
839
                    std::tie(byte_len, char_len) =
840
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
                                                                                   str_end, 1);
842
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
                        char_lookup.end()) {
844
                        break;
845
                    }
846
                    left_trim_pos += byte_len;
847
                }
848
            }
849
850
27
            if constexpr (is_rtrim) {
851
40
                while (right_trim_pos > left_trim_pos) {
852
36
                    const char* prev_char_pos = right_trim_pos;
853
66
                    do {
854
66
                        --prev_char_pos;
855
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
36
                        char_lookup.end()) {
859
23
                        break;
860
23
                    }
861
13
                    right_trim_pos = prev_char_pos;
862
13
                }
863
27
            }
864
865
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
27
        }
869
17
        return Status::OK();
870
17
    }
871
};
872
// This is an implementation of a parameter for the Trim function.
873
template <bool is_ltrim, bool is_rtrim, typename Name>
874
struct Trim1Impl {
875
    static constexpr auto name = Name::name;
876
877
145
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
31
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
37
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
878
879
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
880
91
                          uint32_t result, size_t input_rows_count) {
881
91
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
91
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
91
            auto col_res = ColumnString::create();
884
91
            char blank[] = " ";
885
91
            const StringRef remove_str(blank, 1);
886
91
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
91
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
91
                    col_res->get_offsets())));
889
91
            block.replace_by_position(result, std::move(col_res));
890
91
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
91
        return Status::OK();
896
91
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
32
                          uint32_t result, size_t input_rows_count) {
881
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
32
            auto col_res = ColumnString::create();
884
32
            char blank[] = " ";
885
32
            const StringRef remove_str(blank, 1);
886
32
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
32
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
32
                    col_res->get_offsets())));
889
32
            block.replace_by_position(result, std::move(col_res));
890
32
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
32
        return Status::OK();
896
32
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
21
                          uint32_t result, size_t input_rows_count) {
881
21
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
21
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
21
            auto col_res = ColumnString::create();
884
21
            char blank[] = " ";
885
21
            const StringRef remove_str(blank, 1);
886
21
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
21
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
21
                    col_res->get_offsets())));
889
21
            block.replace_by_position(result, std::move(col_res));
890
21
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
21
        return Status::OK();
896
21
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
26
                          uint32_t result, size_t input_rows_count) {
881
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
26
            auto col_res = ColumnString::create();
884
26
            char blank[] = " ";
885
26
            const StringRef remove_str(blank, 1);
886
26
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
26
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
26
                    col_res->get_offsets())));
889
26
            block.replace_by_position(result, std::move(col_res));
890
26
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
26
        return Status::OK();
896
26
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
1
                          uint32_t result, size_t input_rows_count) {
881
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
1
            auto col_res = ColumnString::create();
884
1
            char blank[] = " ";
885
1
            const StringRef remove_str(blank, 1);
886
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
1
                    col_res->get_offsets())));
889
1
            block.replace_by_position(result, std::move(col_res));
890
1
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
1
        return Status::OK();
896
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
5
                          uint32_t result, size_t input_rows_count) {
881
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
5
            auto col_res = ColumnString::create();
884
5
            char blank[] = " ";
885
5
            const StringRef remove_str(blank, 1);
886
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
5
                    col_res->get_offsets())));
889
5
            block.replace_by_position(result, std::move(col_res));
890
5
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
5
        return Status::OK();
896
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
6
                          uint32_t result, size_t input_rows_count) {
881
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
6
            auto col_res = ColumnString::create();
884
6
            char blank[] = " ";
885
6
            const StringRef remove_str(blank, 1);
886
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
6
                    col_res->get_offsets())));
889
6
            block.replace_by_position(result, std::move(col_res));
890
6
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
6
        return Status::OK();
896
6
    }
897
};
898
899
// This is an implementation of two parameters for the Trim function.
900
template <bool is_ltrim, bool is_rtrim, typename Name>
901
struct Trim2Impl {
902
    static constexpr auto name = Name::name;
903
904
213
    static DataTypes get_variadic_argument_types() {
905
213
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
213
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
15
    static DataTypes get_variadic_argument_types() {
905
15
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
15
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
25
    static DataTypes get_variadic_argument_types() {
905
25
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
25
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
80
    static DataTypes get_variadic_argument_types() {
905
80
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
80
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
27
    static DataTypes get_variadic_argument_types() {
905
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
29
    static DataTypes get_variadic_argument_types() {
905
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
37
    static DataTypes get_variadic_argument_types() {
905
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
37
    }
907
908
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
909
231
                          uint32_t result, size_t input_rows_count) {
910
231
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
231
        const auto& rcol =
912
231
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
231
                        ->get_data_column_ptr();
914
231
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
231
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
231
                auto col_res = ColumnString::create();
917
231
                const auto* remove_str_raw = col_right->get_chars().data();
918
231
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
231
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
231
                if (remove_str.size == 1) {
922
49
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
49
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
49
                            col_res->get_offsets())));
925
182
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
121
                                  std::is_same<Name, NameRTrimIn>::value) {
929
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
121
                                col->get_chars(), col->get_offsets(), remove_str,
931
121
                                col_res->get_chars(), col_res->get_offsets())));
932
121
                    } else {
933
61
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
61
                                col->get_chars(), col->get_offsets(), remove_str,
935
61
                                col_res->get_chars(), col_res->get_offsets())));
936
61
                    }
937
182
                }
938
231
                block.replace_by_position(result, std::move(col_res));
939
231
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
231
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
231
        return Status::OK();
951
231
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
7
                          uint32_t result, size_t input_rows_count) {
910
7
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
7
        const auto& rcol =
912
7
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
7
                        ->get_data_column_ptr();
914
7
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
7
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
7
                auto col_res = ColumnString::create();
917
7
                const auto* remove_str_raw = col_right->get_chars().data();
918
7
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
7
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
7
                if (remove_str.size == 1) {
922
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
2
                            col_res->get_offsets())));
925
5
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
5
                    } else {
933
5
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
5
                                col->get_chars(), col->get_offsets(), remove_str,
935
5
                                col_res->get_chars(), col_res->get_offsets())));
936
5
                    }
937
5
                }
938
7
                block.replace_by_position(result, std::move(col_res));
939
7
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
7
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
7
        return Status::OK();
951
7
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
16
                          uint32_t result, size_t input_rows_count) {
910
16
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
16
        const auto& rcol =
912
16
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
16
                        ->get_data_column_ptr();
914
16
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
16
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
16
                auto col_res = ColumnString::create();
917
16
                const auto* remove_str_raw = col_right->get_chars().data();
918
16
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
16
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
16
                if (remove_str.size == 1) {
922
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
5
                            col_res->get_offsets())));
925
11
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
11
                    } else {
933
11
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
11
                                col->get_chars(), col->get_offsets(), remove_str,
935
11
                                col_res->get_chars(), col_res->get_offsets())));
936
11
                    }
937
11
                }
938
16
                block.replace_by_position(result, std::move(col_res));
939
16
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
16
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
16
        return Status::OK();
951
16
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
69
                          uint32_t result, size_t input_rows_count) {
910
69
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
69
        const auto& rcol =
912
69
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
69
                        ->get_data_column_ptr();
914
69
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
69
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
69
                auto col_res = ColumnString::create();
917
69
                const auto* remove_str_raw = col_right->get_chars().data();
918
69
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
69
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
69
                if (remove_str.size == 1) {
922
24
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
24
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
24
                            col_res->get_offsets())));
925
45
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
45
                    } else {
933
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
45
                                col->get_chars(), col->get_offsets(), remove_str,
935
45
                                col_res->get_chars(), col_res->get_offsets())));
936
45
                    }
937
45
                }
938
69
                block.replace_by_position(result, std::move(col_res));
939
69
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
69
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
69
        return Status::OK();
951
69
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
50
                          uint32_t result, size_t input_rows_count) {
910
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
50
        const auto& rcol =
912
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
50
                        ->get_data_column_ptr();
914
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
50
                auto col_res = ColumnString::create();
917
50
                const auto* remove_str_raw = col_right->get_chars().data();
918
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
50
                if (remove_str.size == 1) {
922
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
7
                            col_res->get_offsets())));
925
43
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
43
                                  std::is_same<Name, NameRTrimIn>::value) {
929
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
43
                                col->get_chars(), col->get_offsets(), remove_str,
931
43
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
43
                }
938
50
                block.replace_by_position(result, std::move(col_res));
939
50
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
50
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
50
        return Status::OK();
951
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
41
                          uint32_t result, size_t input_rows_count) {
910
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
41
        const auto& rcol =
912
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
41
                        ->get_data_column_ptr();
914
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
41
                auto col_res = ColumnString::create();
917
41
                const auto* remove_str_raw = col_right->get_chars().data();
918
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
41
                if (remove_str.size == 1) {
922
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
5
                            col_res->get_offsets())));
925
36
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
36
                                  std::is_same<Name, NameRTrimIn>::value) {
929
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
36
                                col->get_chars(), col->get_offsets(), remove_str,
931
36
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
36
                }
938
41
                block.replace_by_position(result, std::move(col_res));
939
41
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
41
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
41
        return Status::OK();
951
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
48
                          uint32_t result, size_t input_rows_count) {
910
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
48
        const auto& rcol =
912
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
48
                        ->get_data_column_ptr();
914
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
48
                auto col_res = ColumnString::create();
917
48
                const auto* remove_str_raw = col_right->get_chars().data();
918
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
48
                if (remove_str.size == 1) {
922
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
6
                            col_res->get_offsets())));
925
42
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
42
                                  std::is_same<Name, NameRTrimIn>::value) {
929
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
42
                                col->get_chars(), col->get_offsets(), remove_str,
931
42
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
42
                }
938
48
                block.replace_by_position(result, std::move(col_res));
939
48
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
48
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
48
        return Status::OK();
951
48
    }
952
};
953
954
template <typename impl>
955
class FunctionTrim : public IFunction {
956
public:
957
    static constexpr auto name = impl::name;
958
370
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
958
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
958
32
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
958
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
958
16
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
958
26
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
958
81
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
958
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
958
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
958
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
958
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
958
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
958
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
959
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
960
961
262
    size_t get_number_of_arguments() const override {
962
262
        return get_variadic_argument_types_impl().size();
963
262
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
33
    size_t get_number_of_arguments() const override {
962
33
        return get_variadic_argument_types_impl().size();
963
33
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
23
    size_t get_number_of_arguments() const override {
962
23
        return get_variadic_argument_types_impl().size();
963
23
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
29
    size_t get_number_of_arguments() const override {
962
29
        return get_variadic_argument_types_impl().size();
963
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
7
    size_t get_number_of_arguments() const override {
962
7
        return get_variadic_argument_types_impl().size();
963
7
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
17
    size_t get_number_of_arguments() const override {
962
17
        return get_variadic_argument_types_impl().size();
963
17
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
72
    size_t get_number_of_arguments() const override {
962
72
        return get_variadic_argument_types_impl().size();
963
72
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
1
    size_t get_number_of_arguments() const override {
962
1
        return get_variadic_argument_types_impl().size();
963
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
5
    size_t get_number_of_arguments() const override {
962
5
        return get_variadic_argument_types_impl().size();
963
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
6
    size_t get_number_of_arguments() const override {
962
6
        return get_variadic_argument_types_impl().size();
963
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
19
    size_t get_number_of_arguments() const override {
962
19
        return get_variadic_argument_types_impl().size();
963
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
21
    size_t get_number_of_arguments() const override {
962
21
        return get_variadic_argument_types_impl().size();
963
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
29
    size_t get_number_of_arguments() const override {
962
29
        return get_variadic_argument_types_impl().size();
963
29
    }
964
965
262
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
262
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
262
        return arguments[0];
972
262
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
33
        return arguments[0];
972
33
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
23
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
23
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
23
        return arguments[0];
972
23
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
29
        return arguments[0];
972
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
7
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
7
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
7
        return arguments[0];
972
7
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
17
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
17
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
17
        return arguments[0];
972
17
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
72
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
72
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
72
        return arguments[0];
972
72
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
1
        return arguments[0];
972
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
5
        return arguments[0];
972
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
6
        return arguments[0];
972
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
19
        return arguments[0];
972
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
21
        return arguments[0];
972
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
29
        return arguments[0];
972
29
    }
973
    // The second parameter of "trim" is a constant.
974
420
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
61
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
34
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
40
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
7
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
17
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
72
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
975
976
358
    DataTypes get_variadic_argument_types_impl() const override {
977
358
        return impl::get_variadic_argument_types();
978
358
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
41
    DataTypes get_variadic_argument_types_impl() const override {
977
41
        return impl::get_variadic_argument_types();
978
41
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
31
    DataTypes get_variadic_argument_types_impl() const override {
977
31
        return impl::get_variadic_argument_types();
978
31
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
37
    DataTypes get_variadic_argument_types_impl() const override {
977
37
        return impl::get_variadic_argument_types();
978
37
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
15
    DataTypes get_variadic_argument_types_impl() const override {
977
15
        return impl::get_variadic_argument_types();
978
15
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
25
    DataTypes get_variadic_argument_types_impl() const override {
977
25
        return impl::get_variadic_argument_types();
978
25
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
80
    DataTypes get_variadic_argument_types_impl() const override {
977
80
        return impl::get_variadic_argument_types();
978
80
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
9
    DataTypes get_variadic_argument_types_impl() const override {
977
9
        return impl::get_variadic_argument_types();
978
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
13
    DataTypes get_variadic_argument_types_impl() const override {
977
13
        return impl::get_variadic_argument_types();
978
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
14
    DataTypes get_variadic_argument_types_impl() const override {
977
14
        return impl::get_variadic_argument_types();
978
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
27
    DataTypes get_variadic_argument_types_impl() const override {
977
27
        return impl::get_variadic_argument_types();
978
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
29
    DataTypes get_variadic_argument_types_impl() const override {
977
29
        return impl::get_variadic_argument_types();
978
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
37
    DataTypes get_variadic_argument_types_impl() const override {
977
37
        return impl::get_variadic_argument_types();
978
37
    }
979
980
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
981
322
                        uint32_t result, size_t input_rows_count) const override {
982
322
        return impl::execute(context, block, arguments, result, input_rows_count);
983
322
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
32
                        uint32_t result, size_t input_rows_count) const override {
982
32
        return impl::execute(context, block, arguments, result, input_rows_count);
983
32
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
21
                        uint32_t result, size_t input_rows_count) const override {
982
21
        return impl::execute(context, block, arguments, result, input_rows_count);
983
21
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
26
                        uint32_t result, size_t input_rows_count) const override {
982
26
        return impl::execute(context, block, arguments, result, input_rows_count);
983
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
7
                        uint32_t result, size_t input_rows_count) const override {
982
7
        return impl::execute(context, block, arguments, result, input_rows_count);
983
7
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
16
                        uint32_t result, size_t input_rows_count) const override {
982
16
        return impl::execute(context, block, arguments, result, input_rows_count);
983
16
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
69
                        uint32_t result, size_t input_rows_count) const override {
982
69
        return impl::execute(context, block, arguments, result, input_rows_count);
983
69
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
1
                        uint32_t result, size_t input_rows_count) const override {
982
1
        return impl::execute(context, block, arguments, result, input_rows_count);
983
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
5
                        uint32_t result, size_t input_rows_count) const override {
982
5
        return impl::execute(context, block, arguments, result, input_rows_count);
983
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
6
                        uint32_t result, size_t input_rows_count) const override {
982
6
        return impl::execute(context, block, arguments, result, input_rows_count);
983
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
50
                        uint32_t result, size_t input_rows_count) const override {
982
50
        return impl::execute(context, block, arguments, result, input_rows_count);
983
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
41
                        uint32_t result, size_t input_rows_count) const override {
982
41
        return impl::execute(context, block, arguments, result, input_rows_count);
983
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
48
                        uint32_t result, size_t input_rows_count) const override {
982
48
        return impl::execute(context, block, arguments, result, input_rows_count);
983
48
    }
984
};
985
986
struct UnHexImplEmpty {
987
    static constexpr auto name = "unhex";
988
};
989
990
struct UnHexImplNull {
991
    static constexpr auto name = "unhex_null";
992
};
993
994
template <typename Name>
995
struct UnHexImpl {
996
    static constexpr auto name = Name::name;
997
    using ReturnType = DataTypeString;
998
    using ColumnType = ColumnString;
999
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1000
1001
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1002
125
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1003
125
        auto rows_count = offsets.size();
1004
125
        dst_offsets.resize(rows_count);
1005
1006
125
        int64_t total_size = 0;
1007
264
        for (size_t i = 0; i < rows_count; i++) {
1008
139
            size_t len = offsets[i] - offsets[i - 1];
1009
139
            total_size += len / 2;
1010
139
        }
1011
125
        ColumnString::check_chars_length(total_size, rows_count);
1012
125
        dst_data.resize(total_size);
1013
125
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1014
125
        size_t offset = 0;
1015
1016
264
        for (int i = 0; i < rows_count; ++i) {
1017
139
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1018
139
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1019
1020
139
            if (UNLIKELY(srclen == 0)) {
1021
12
                dst_offsets[i] = cast_set<uint32_t>(offset);
1022
12
                continue;
1023
12
            }
1024
1025
127
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1026
1027
127
            offset += outlen;
1028
127
            dst_offsets[i] = cast_set<uint32_t>(offset);
1029
127
        }
1030
125
        dst_data.pop_back(total_size - offset);
1031
125
        return Status::OK();
1032
125
    }
1033
1034
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1035
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1036
33
                         ColumnUInt8::Container* null_map_data) {
1037
33
        auto rows_count = offsets.size();
1038
33
        dst_offsets.resize(rows_count);
1039
1040
33
        int64_t total_size = 0;
1041
84
        for (size_t i = 0; i < rows_count; i++) {
1042
51
            size_t len = offsets[i] - offsets[i - 1];
1043
51
            total_size += len / 2;
1044
51
        }
1045
33
        ColumnString::check_chars_length(total_size, rows_count);
1046
33
        dst_data.resize(total_size);
1047
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1048
33
        size_t offset = 0;
1049
1050
84
        for (int i = 0; i < rows_count; ++i) {
1051
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1052
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1053
1054
51
            if (UNLIKELY(srclen == 0)) {
1055
7
                (*null_map_data)[i] = 1;
1056
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1057
7
                continue;
1058
7
            }
1059
1060
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1061
1062
44
            if (outlen == 0) {
1063
13
                (*null_map_data)[i] = 1;
1064
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1065
13
                continue;
1066
13
            }
1067
1068
31
            offset += outlen;
1069
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1070
31
        }
1071
33
        dst_data.pop_back(total_size - offset);
1072
33
        return Status::OK();
1073
33
    }
1074
};
1075
1076
struct NameStringSpace {
1077
    static constexpr auto name = "space";
1078
};
1079
1080
struct StringSpace {
1081
    using ReturnType = DataTypeString;
1082
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1083
    using Type = Int32;
1084
    using ReturnColumnType = ColumnString;
1085
1086
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1087
8
                         ColumnString::Offsets& res_offsets) {
1088
8
        res_offsets.resize(data.size());
1089
8
        size_t input_size = res_offsets.size();
1090
8
        int64_t total_size = 0;
1091
16
        for (size_t i = 0; i < input_size; ++i) {
1092
8
            if (data[i] > 0) {
1093
4
                total_size += data[i];
1094
4
            }
1095
8
        }
1096
8
        ColumnString::check_chars_length(total_size, input_size);
1097
8
        res_data.reserve(total_size);
1098
1099
16
        for (size_t i = 0; i < input_size; ++i) {
1100
8
            if (data[i] > 0) [[likely]] {
1101
4
                res_data.resize_fill(res_data.size() + data[i], ' ');
1102
4
                cast_set(res_offsets[i], res_data.size());
1103
4
            } else {
1104
4
                StringOP::push_empty_string(i, res_data, res_offsets);
1105
4
            }
1106
8
        }
1107
8
        return Status::OK();
1108
8
    }
1109
};
1110
1111
struct ToBase64Impl {
1112
    static constexpr auto name = "to_base64";
1113
    using ReturnType = DataTypeString;
1114
    using ColumnType = ColumnString;
1115
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1116
1117
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1118
94
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1119
94
        auto rows_count = offsets.size();
1120
94
        dst_offsets.resize(rows_count);
1121
1122
94
        size_t total_size = 0;
1123
199
        for (size_t i = 0; i < rows_count; i++) {
1124
105
            size_t len = offsets[i] - offsets[i - 1];
1125
105
            total_size += 4 * ((len + 2) / 3);
1126
105
        }
1127
94
        ColumnString::check_chars_length(total_size, rows_count);
1128
94
        dst_data.resize(total_size);
1129
94
        auto* dst_data_ptr = dst_data.data();
1130
94
        size_t offset = 0;
1131
1132
199
        for (int i = 0; i < rows_count; ++i) {
1133
105
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1134
105
            size_t srclen = offsets[i] - offsets[i - 1];
1135
1136
105
            if (UNLIKELY(srclen == 0)) {
1137
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1138
7
                continue;
1139
7
            }
1140
1141
98
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1142
98
                                               (unsigned char*)(dst_data_ptr + offset));
1143
1144
98
            offset += outlen;
1145
98
            dst_offsets[i] = cast_set<uint32_t>(offset);
1146
98
        }
1147
94
        dst_data.pop_back(total_size - offset);
1148
94
        return Status::OK();
1149
94
    }
1150
};
1151
1152
struct FromBase64Impl {
1153
    static constexpr auto name = "from_base64";
1154
    using ReturnType = DataTypeString;
1155
    using ColumnType = ColumnString;
1156
1157
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1158
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1159
87
                         NullMap& null_map) {
1160
87
        auto rows_count = offsets.size();
1161
87
        dst_offsets.resize(rows_count);
1162
1163
87
        size_t total_size = 0;
1164
183
        for (size_t i = 0; i < rows_count; i++) {
1165
96
            auto len = offsets[i] - offsets[i - 1];
1166
96
            total_size += len / 4 * 3;
1167
96
        }
1168
87
        ColumnString::check_chars_length(total_size, rows_count);
1169
87
        dst_data.resize(total_size);
1170
87
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1171
87
        size_t offset = 0;
1172
1173
183
        for (int i = 0; i < rows_count; ++i) {
1174
96
            if (UNLIKELY(null_map[i])) {
1175
0
                null_map[i] = 1;
1176
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1177
0
                continue;
1178
0
            }
1179
1180
96
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1181
96
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1182
1183
96
            if (UNLIKELY(srclen == 0)) {
1184
6
                dst_offsets[i] = cast_set<uint32_t>(offset);
1185
6
                continue;
1186
6
            }
1187
1188
90
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1189
1190
90
            if (outlen < 0) {
1191
12
                null_map[i] = 1;
1192
12
                dst_offsets[i] = cast_set<uint32_t>(offset);
1193
78
            } else {
1194
78
                offset += outlen;
1195
78
                dst_offsets[i] = cast_set<uint32_t>(offset);
1196
78
            }
1197
90
        }
1198
87
        dst_data.pop_back(total_size - offset);
1199
87
        return Status::OK();
1200
87
    }
1201
};
1202
1203
struct StringAppendTrailingCharIfAbsent {
1204
    static constexpr auto name = "append_trailing_char_if_absent";
1205
    using Chars = ColumnString::Chars;
1206
    using Offsets = ColumnString::Offsets;
1207
    using ReturnType = DataTypeString;
1208
    using ColumnType = ColumnString;
1209
1210
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1211
48
        if (str.size < end.size) {
1212
11
            return false;
1213
11
        }
1214
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1215
37
        return str.end_with(end);
1216
48
    }
1217
1218
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1219
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1220
40
                              Offsets& res_offsets, NullMap& null_map_data) {
1221
40
        DCHECK_EQ(loffsets.size(), roffsets.size());
1222
40
        size_t input_rows_count = loffsets.size();
1223
40
        res_offsets.resize(input_rows_count);
1224
40
        fmt::memory_buffer buffer;
1225
1226
92
        for (size_t i = 0; i < input_rows_count; ++i) {
1227
52
            buffer.clear();
1228
1229
52
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1230
52
                                       loffsets[i] - loffsets[i - 1]);
1231
52
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1232
52
                                       roffsets[i] - roffsets[i - 1]);
1233
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1234
52
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1235
52
                    rstr.begin(), rstr.end(), 2);
1236
1237
52
            if (char_len != 1) {
1238
16
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1239
16
                continue;
1240
16
            }
1241
36
            if (str_end_with(lstr, rstr)) {
1242
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1243
9
                continue;
1244
9
            }
1245
1246
27
            buffer.append(lstr.begin(), lstr.end());
1247
27
            buffer.append(rstr.begin(), rstr.end());
1248
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1249
27
                                        res_offsets);
1250
27
        }
1251
40
    }
1252
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1253
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1254
8
                              NullMap& null_map_data) {
1255
8
        size_t input_rows_count = loffsets.size();
1256
8
        res_offsets.resize(input_rows_count);
1257
8
        fmt::memory_buffer buffer;
1258
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1259
8
        auto [byte_len, char_len] =
1260
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1261
8
        if (char_len != 1) {
1262
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1263
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1264
2
            }
1265
2
            return;
1266
2
        }
1267
1268
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1269
6
            buffer.clear();
1270
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1271
6
                                       loffsets[i] - loffsets[i - 1]);
1272
1273
6
            if (str_end_with(lstr, rstr)) {
1274
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1275
2
                continue;
1276
2
            }
1277
1278
4
            buffer.append(lstr.begin(), lstr.end());
1279
4
            buffer.append(rstr.begin(), rstr.end());
1280
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1281
4
                                        res_offsets);
1282
4
        }
1283
6
    }
1284
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1285
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1286
8
                              NullMap& null_map_data) {
1287
8
        size_t input_rows_count = roffsets.size();
1288
8
        res_offsets.resize(input_rows_count);
1289
8
        fmt::memory_buffer buffer;
1290
1291
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1292
8
            buffer.clear();
1293
1294
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1295
8
                                       roffsets[i] - roffsets[i - 1]);
1296
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1297
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1298
8
                    rstr.begin(), rstr.end(), 2);
1299
1300
8
            if (char_len != 1) {
1301
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1302
2
                continue;
1303
2
            }
1304
6
            if (str_end_with(lstr, rstr)) {
1305
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1306
2
                continue;
1307
2
            }
1308
1309
4
            buffer.append(lstr.begin(), lstr.end());
1310
4
            buffer.append(rstr.begin(), rstr.end());
1311
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1312
4
                                        res_offsets);
1313
4
        }
1314
8
    }
1315
};
1316
1317
struct StringLPad {
1318
    static constexpr auto name = "lpad";
1319
    static constexpr auto is_lpad = true;
1320
};
1321
1322
struct StringRPad {
1323
    static constexpr auto name = "rpad";
1324
    static constexpr auto is_lpad = false;
1325
};
1326
1327
template <typename LeftDataType, typename RightDataType>
1328
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1329
1330
template <typename LeftDataType, typename RightDataType>
1331
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1332
1333
template <typename LeftDataType, typename RightDataType>
1334
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1335
1336
// ready for regist function
1337
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1338
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1339
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1340
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1341
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1342
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1343
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1344
using FunctionStringStartsWith =
1345
        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
1346
using FunctionStringEndsWith =
1347
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1348
using FunctionStringInstr =
1349
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1350
using FunctionStringLocate =
1351
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1352
using FunctionStringFindInSet =
1353
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1354
1355
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1356
1357
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1358
1359
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1360
1361
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1362
1363
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1364
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1365
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1366
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1367
1368
using FunctionStringAppendTrailingCharIfAbsent =
1369
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1370
1371
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1372
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1373
1374
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1375
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1376
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1377
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1378
extern void register_function_string_search(SimpleFunctionFactory& factory);
1379
extern void register_function_string_url(SimpleFunctionFactory& factory);
1380
1381
8
void register_function_string(SimpleFunctionFactory& factory) {
1382
8
    register_function_string_basic(factory);
1383
8
    register_function_string_digest(factory);
1384
8
    register_function_string_mask(factory);
1385
8
    register_function_string_misc(factory);
1386
8
    register_function_string_search(factory);
1387
8
    register_function_string_url(factory);
1388
1389
8
    factory.register_function<FunctionStringParseDataSize>();
1390
8
    factory.register_function<FunctionStringASCII>();
1391
8
    factory.register_function<FunctionStringLength>();
1392
8
    factory.register_function<FunctionCrc32>();
1393
8
    factory.register_function<FunctionStringUTF8Length>();
1394
8
    factory.register_function<FunctionStringSpace>();
1395
8
    factory.register_function<FunctionStringStartsWith>();
1396
8
    factory.register_function<FunctionStringEndsWith>();
1397
8
    factory.register_function<FunctionStringInstr>();
1398
8
    factory.register_function<FunctionStringFindInSet>();
1399
8
    factory.register_function<FunctionStringLocate>();
1400
8
    factory.register_function<FunctionQuote>();
1401
8
    factory.register_function<FunctionReverseCommon>();
1402
8
    factory.register_function<FunctionUnHex>();
1403
8
    factory.register_function<FunctionUnHexNullable>();
1404
8
    factory.register_function<FunctionToLower>();
1405
8
    factory.register_function<FunctionToUpper>();
1406
8
    factory.register_function<FunctionToInitcap>();
1407
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1408
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1409
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1410
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1411
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1412
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1413
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1414
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1415
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1416
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1417
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1418
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1419
8
    factory.register_function<FunctionStringConcat>();
1420
8
    factory.register_function<FunctionStringElt>();
1421
8
    factory.register_function<FunctionStringConcatWs>();
1422
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1423
8
    factory.register_function<FunctionStringRepeat>();
1424
8
    factory.register_function<FunctionStringLPad>();
1425
8
    factory.register_function<FunctionStringRPad>();
1426
8
    factory.register_function<FunctionToBase64>();
1427
8
    factory.register_function<FunctionFromBase64>();
1428
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1429
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1430
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1431
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1432
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1433
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1434
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1435
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1436
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1437
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1438
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1439
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1440
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1441
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1442
8
    factory.register_function<
1443
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1444
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1445
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1446
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1447
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1448
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1449
8
    factory.register_function<FunctionOverlay>();
1450
8
    factory.register_function<FunctionIsValidUTF8>();
1451
1452
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1453
8
    factory.register_alias(FunctionToLower::name, "lcase");
1454
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1455
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1456
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1457
8
    factory.register_alias(FunctionOverlay::name, "insert");
1458
8
}
1459
1460
} // namespace doris