Coverage Report

Created: 2026-04-20 08:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <cstddef>
28
#include <cstdint>
29
#include <string_view>
30
31
#include "common/cast_set.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "core/column/column_string.h"
35
#include "core/pod_array_fwd.h"
36
#include "core/string_ref.h"
37
#include "exprs/function/function_reverse.h"
38
#include "exprs/function/function_string_concat.h"
39
#include "exprs/function/function_string_format.h"
40
#include "exprs/function/function_string_replace.h"
41
#include "exprs/function/function_string_to_string.h"
42
#include "exprs/function/function_totype.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/function/string_hex_util.h"
45
#include "util/string_search.hpp"
46
#include "util/url_coding.h"
47
#include "util/utf8_check.h"
48
49
namespace doris {
50
struct NameStringASCII {
51
    static constexpr auto name = "ascii";
52
};
53
54
struct StringASCII {
55
    using ReturnType = DataTypeInt32;
56
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
57
    using Type = String;
58
    using ReturnColumnType = ColumnInt32;
59
60
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
61
57
                         PaddedPODArray<Int32>& res) {
62
57
        auto size = offsets.size();
63
57
        res.resize(size);
64
180
        for (int i = 0; i < size; ++i) {
65
123
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
66
123
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
67
123
        }
68
57
        return Status::OK();
69
57
    }
70
};
71
72
struct NameParseDataSize {
73
    static constexpr auto name = "parse_data_size";
74
};
75
76
static const std::map<std::string_view, Int128> UNITS = {
77
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
78
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
79
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
80
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
81
        {"YB", static_cast<Int128>(1) << 80}};
82
83
struct ParseDataSize {
84
    using ReturnType = DataTypeInt128;
85
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
86
    using Type = String;
87
    using ReturnColumnType = ColumnInt128;
88
89
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
90
48
                         PaddedPODArray<Int128>& res) {
91
48
        auto size = offsets.size();
92
48
        res.resize(size);
93
100
        for (int i = 0; i < size; ++i) {
94
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
95
52
            int str_size = offsets[i] - offsets[i - 1];
96
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
97
52
        }
98
48
        return Status::OK();
99
48
    }
100
101
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
102
52
        int digit_length = 0;
103
216
        for (char c : dataSize) {
104
216
            if (isdigit(c) || c == '.') {
105
166
                digit_length++;
106
166
            } else {
107
50
                break;
108
50
            }
109
216
        }
110
111
52
        if (digit_length == 0) {
112
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
113
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
114
4
                                   dataSize);
115
4
        }
116
        // 123.45MB--->123.45 : MB
117
48
        double value = 0.0;
118
48
        try {
119
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
120
48
        } catch (const std::exception& e) {
121
0
            throw doris::Exception(
122
0
                    ErrorCode::INVALID_ARGUMENT,
123
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
124
0
                    dataSize, e.what());
125
0
        }
126
48
        auto unit = dataSize.substr(digit_length);
127
48
        auto it = UNITS.find(unit);
128
48
        if (it != UNITS.end()) {
129
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
130
45
        } else {
131
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
132
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
133
3
                                   dataSize);
134
3
        }
135
48
    }
136
};
137
138
struct NameQuote {
139
    static constexpr auto name = "quote";
140
};
141
142
struct NameQuoteImpl {
143
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
144
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
145
17
        size_t offset_size = offsets.size();
146
17
        ColumnString::Offset pos = 0;
147
17
        res_offsets.resize(offset_size);
148
17
        res_data.resize(data.size() + offset_size * 2);
149
45
        for (int i = 0; i < offset_size; i++) {
150
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
151
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
152
28
            res_data[pos] = '\'';
153
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
154
28
            res_data[pos + size + 1] = '\'';
155
28
            pos += size + 2;
156
28
            res_offsets[i] = pos;
157
28
        }
158
17
        return Status::OK();
159
17
    }
160
};
161
162
struct NameStringLength {
163
    static constexpr auto name = "length";
164
};
165
166
struct StringLengthImpl {
167
    using ReturnType = DataTypeInt32;
168
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
169
    using Type = String;
170
    using ReturnColumnType = ColumnInt32;
171
172
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
173
10.1k
                         PaddedPODArray<Int32>& res) {
174
10.1k
        auto size = offsets.size();
175
10.1k
        res.resize(size);
176
4.54M
        for (int i = 0; i < size; ++i) {
177
4.53M
            int str_size = offsets[i] - offsets[i - 1];
178
4.53M
            res[i] = str_size;
179
4.53M
        }
180
10.1k
        return Status::OK();
181
10.1k
    }
182
};
183
184
struct NameCrc32 {
185
    static constexpr auto name = "crc32";
186
};
187
188
struct Crc32Impl {
189
    using ReturnType = DataTypeInt64;
190
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
191
    using Type = String;
192
    using ReturnColumnType = ColumnInt64;
193
194
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
195
3
                         PaddedPODArray<Int64>& res) {
196
3
        auto size = offsets.size();
197
3
        res.resize(size);
198
6
        for (int i = 0; i < size; ++i) {
199
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
200
3
                             offsets[i] - offsets[i - 1]);
201
3
        }
202
3
        return Status::OK();
203
3
    }
204
};
205
206
struct NameStringUtf8Length {
207
    static constexpr auto name = "char_length";
208
};
209
210
struct StringUtf8LengthImpl {
211
    using ReturnType = DataTypeInt32;
212
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
213
    using Type = String;
214
    using ReturnColumnType = ColumnInt32;
215
216
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
217
56
                         PaddedPODArray<Int32>& res) {
218
56
        auto size = offsets.size();
219
56
        res.resize(size);
220
186
        for (int i = 0; i < size; ++i) {
221
130
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
222
130
            int str_size = offsets[i] - offsets[i - 1];
223
130
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
224
130
        }
225
56
        return Status::OK();
226
56
    }
227
};
228
229
struct NameIsValidUTF8 {
230
    static constexpr auto name = "is_valid_utf8";
231
};
232
233
struct IsValidUTF8Impl {
234
    using ReturnType = DataTypeUInt8;
235
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
236
    using Type = String;
237
    using ReturnColumnType = ColumnUInt8;
238
239
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
240
39
                         PaddedPODArray<UInt8>& res) {
241
39
        auto size = offsets.size();
242
39
        res.resize(size);
243
98
        for (size_t i = 0; i < size; ++i) {
244
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
245
59
            size_t str_size = offsets[i] - offsets[i - 1];
246
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
247
59
        }
248
39
        return Status::OK();
249
39
    }
250
};
251
252
struct NameStartsWith {
253
    static constexpr auto name = "starts_with";
254
};
255
256
struct StartsWithOp {
257
    using ResultDataType = DataTypeUInt8;
258
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
259
260
165
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
261
165
        res = strl.starts_with(strr);
262
165
    }
263
};
264
265
struct NameEndsWith {
266
    static constexpr auto name = "ends_with";
267
};
268
269
struct EndsWithOp {
270
    using ResultDataType = DataTypeUInt8;
271
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
272
273
166
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
274
166
        res = strl.ends_with(strr);
275
166
    }
276
};
277
278
struct NameFindInSet {
279
    static constexpr auto name = "find_in_set";
280
};
281
282
struct FindInSetOp {
283
    using ResultDataType = DataTypeInt32;
284
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
285
190
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
286
690
        for (const auto& c : strl) {
287
690
            if (c == ',') {
288
21
                res = 0;
289
21
                return;
290
21
            }
291
690
        }
292
293
169
        int32_t token_index = 1;
294
169
        int32_t start = 0;
295
169
        int32_t end;
296
297
357
        do {
298
357
            end = start;
299
            // Position end.
300
1.26k
            while (end < strr.length() && strr[end] != ',') {
301
910
                ++end;
302
910
            }
303
304
357
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
305
109
                res = token_index;
306
109
                return;
307
109
            }
308
309
            // Re-position start and end past ','
310
248
            start = end + 1;
311
248
            ++token_index;
312
248
        } while (start < strr.length());
313
60
        res = 0;
314
60
    }
315
};
316
317
struct NameInstr {
318
    static constexpr auto name = "instr";
319
};
320
321
// LeftDataType and RightDataType are DataTypeString
322
template <typename LeftDataType, typename RightDataType>
323
struct StringInStrImpl {
324
    using ResultDataType = DataTypeInt32;
325
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
326
327
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
328
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
329
72
        StringRef lstr_ref(ldata.data, ldata.size);
330
331
72
        auto size = roffsets.size();
332
72
        res.resize(size);
333
144
        for (int i = 0; i < size; ++i) {
334
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
335
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
336
337
72
            StringRef rstr_ref(r_raw_str, r_str_size);
338
339
72
            res[i] = execute(lstr_ref, rstr_ref);
340
72
        }
341
342
72
        return Status::OK();
343
72
    }
344
345
    static Status vector_scalar(const ColumnString::Chars& ldata,
346
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
347
116
                                ResultPaddedPODArray& res) {
348
116
        auto size = loffsets.size();
349
116
        res.resize(size);
350
351
116
        if (rdata.size == 0) {
352
12
            std::fill(res.begin(), res.end(), 1);
353
12
            return Status::OK();
354
12
        }
355
356
104
        const UInt8* begin = ldata.data();
357
104
        const UInt8* end = begin + ldata.size();
358
104
        const UInt8* pos = begin;
359
360
        /// Current index in the array of strings.
361
104
        size_t i = 0;
362
104
        std::fill(res.begin(), res.end(), 0);
363
364
104
        StringRef rstr_ref(rdata.data, rdata.size);
365
104
        StringSearch search(&rstr_ref);
366
367
146
        while (pos < end) {
368
            // search return matched substring start offset
369
118
            pos = (UInt8*)search.search((char*)pos, end - pos);
370
118
            if (pos >= end) {
371
76
                break;
372
76
            }
373
374
            /// Determine which index it refers to.
375
            /// begin + value_offsets[i] is the start offset of string at i+1
376
48
            while (begin + loffsets[i] < pos) {
377
6
                ++i;
378
6
            }
379
380
            /// We check that the entry does not pass through the boundaries of strings.
381
42
            if (pos + rdata.size <= begin + loffsets[i]) {
382
40
                int loc = (int)(pos - begin) - loffsets[i - 1];
383
40
                int l_str_size = loffsets[i] - loffsets[i - 1];
384
40
                auto len = std::min(l_str_size, loc);
385
40
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
386
40
                res[i] = loc + 1;
387
40
            }
388
389
            // move to next string offset
390
42
            pos = begin + loffsets[i];
391
42
            ++i;
392
42
        }
393
394
104
        return Status::OK();
395
116
    }
396
397
    static Status vector_vector(const ColumnString::Chars& ldata,
398
                                const ColumnString::Offsets& loffsets,
399
                                const ColumnString::Chars& rdata,
400
207
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
401
207
        DCHECK_EQ(loffsets.size(), roffsets.size());
402
403
207
        auto size = loffsets.size();
404
207
        res.resize(size);
405
661
        for (int i = 0; i < size; ++i) {
406
454
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
407
454
            int l_str_size = loffsets[i] - loffsets[i - 1];
408
454
            StringRef lstr_ref(l_raw_str, l_str_size);
409
410
454
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
411
454
            int r_str_size = roffsets[i] - roffsets[i - 1];
412
454
            StringRef rstr_ref(r_raw_str, r_str_size);
413
414
454
            res[i] = execute(lstr_ref, rstr_ref);
415
454
        }
416
417
207
        return Status::OK();
418
207
    }
419
420
526
    static int execute(const StringRef& strl, const StringRef& strr) {
421
526
        if (strr.size == 0) {
422
71
            return 1;
423
71
        }
424
425
455
        StringSearch search(&strr);
426
        // Hive returns positions starting from 1.
427
455
        int loc = search.search(&strl);
428
455
        if (loc > 0) {
429
43
            int len = std::min(loc, (int)strl.size);
430
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
431
43
        }
432
433
455
        return loc + 1;
434
526
    }
435
};
436
437
// the same impl as instr
438
struct NameLocate {
439
    static constexpr auto name = "locate";
440
};
441
442
// LeftDataType and RightDataType are DataTypeString
443
template <typename LeftDataType, typename RightDataType>
444
struct StringLocateImpl {
445
    using ResultDataType = DataTypeInt32;
446
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
447
448
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
449
40
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
450
40
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
451
40
                                                                           res);
452
40
    }
453
454
    static Status vector_scalar(const ColumnString::Chars& ldata,
455
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
456
36
                                ResultPaddedPODArray& res) {
457
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
458
36
                                                                           res);
459
36
    }
460
461
    static Status vector_vector(const ColumnString::Chars& ldata,
462
                                const ColumnString::Offsets& loffsets,
463
                                const ColumnString::Chars& rdata,
464
126
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
465
126
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
466
126
                                                                           loffsets, res);
467
126
    }
468
};
469
470
// LeftDataType and RightDataType are DataTypeString
471
template <typename LeftDataType, typename RightDataType, typename OP>
472
struct StringFunctionImpl {
473
    using ResultDataType = typename OP::ResultDataType;
474
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
475
476
    static Status vector_vector(const ColumnString::Chars& ldata,
477
                                const ColumnString::Offsets& loffsets,
478
                                const ColumnString::Chars& rdata,
479
213
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
213
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
213
        auto size = loffsets.size();
483
213
        res.resize(size);
484
576
        for (int i = 0; i < size; ++i) {
485
363
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
363
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
363
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
363
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
363
            std::string_view lview(l_raw_str, l_str_size);
492
363
            std::string_view rview(r_raw_str, r_str_size);
493
494
363
            OP::execute(lview, rview, res[i]);
495
363
        }
496
213
        return Status::OK();
497
213
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
479
88
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
88
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
88
        auto size = loffsets.size();
483
88
        res.resize(size);
484
215
        for (int i = 0; i < size; ++i) {
485
127
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
127
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
127
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
127
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
127
            std::string_view lview(l_raw_str, l_str_size);
492
127
            std::string_view rview(r_raw_str, r_str_size);
493
494
127
            OP::execute(lview, rview, res[i]);
495
127
        }
496
88
        return Status::OK();
497
88
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
479
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
61
        auto size = loffsets.size();
483
61
        res.resize(size);
484
175
        for (int i = 0; i < size; ++i) {
485
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
114
            std::string_view lview(l_raw_str, l_str_size);
492
114
            std::string_view rview(r_raw_str, r_str_size);
493
494
114
            OP::execute(lview, rview, res[i]);
495
114
        }
496
61
        return Status::OK();
497
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
479
64
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
64
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
64
        auto size = loffsets.size();
483
64
        res.resize(size);
484
186
        for (int i = 0; i < size; ++i) {
485
122
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
122
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
122
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
122
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
122
            std::string_view lview(l_raw_str, l_str_size);
492
122
            std::string_view rview(r_raw_str, r_str_size);
493
494
122
            OP::execute(lview, rview, res[i]);
495
122
        }
496
64
        return Status::OK();
497
64
    }
498
    static Status vector_scalar(const ColumnString::Chars& ldata,
499
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
500
40
                                ResultPaddedPODArray& res) {
501
40
        auto size = loffsets.size();
502
40
        res.resize(size);
503
40
        std::string_view rview(rdata.data, rdata.size);
504
148
        for (int i = 0; i < size; ++i) {
505
108
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
108
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
108
            std::string_view lview(l_raw_str, l_str_size);
508
509
108
            OP::execute(lview, rview, res[i]);
510
108
        }
511
40
        return Status::OK();
512
40
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
500
6
                                ResultPaddedPODArray& res) {
501
6
        auto size = loffsets.size();
502
6
        res.resize(size);
503
6
        std::string_view rview(rdata.data, rdata.size);
504
40
        for (int i = 0; i < size; ++i) {
505
34
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
34
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
34
            std::string_view lview(l_raw_str, l_str_size);
508
509
34
            OP::execute(lview, rview, res[i]);
510
34
        }
511
6
        return Status::OK();
512
6
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
500
16
                                ResultPaddedPODArray& res) {
501
16
        auto size = loffsets.size();
502
16
        res.resize(size);
503
16
        std::string_view rview(rdata.data, rdata.size);
504
54
        for (int i = 0; i < size; ++i) {
505
38
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
38
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
38
            std::string_view lview(l_raw_str, l_str_size);
508
509
38
            OP::execute(lview, rview, res[i]);
510
38
        }
511
16
        return Status::OK();
512
16
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
500
18
                                ResultPaddedPODArray& res) {
501
18
        auto size = loffsets.size();
502
18
        res.resize(size);
503
18
        std::string_view rview(rdata.data, rdata.size);
504
54
        for (int i = 0; i < size; ++i) {
505
36
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
36
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
36
            std::string_view lview(l_raw_str, l_str_size);
508
509
36
            OP::execute(lview, rview, res[i]);
510
36
        }
511
18
        return Status::OK();
512
18
    }
513
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
514
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
44
        auto size = roffsets.size();
516
44
        res.resize(size);
517
44
        std::string_view lview(ldata.data, ldata.size);
518
94
        for (int i = 0; i < size; ++i) {
519
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
50
            std::string_view rview(r_raw_str, r_str_size);
522
523
50
            OP::execute(lview, rview, res[i]);
524
50
        }
525
44
        return Status::OK();
526
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
514
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
4
        auto size = roffsets.size();
516
4
        res.resize(size);
517
4
        std::string_view lview(ldata.data, ldata.size);
518
8
        for (int i = 0; i < size; ++i) {
519
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
4
            std::string_view rview(r_raw_str, r_str_size);
522
523
4
            OP::execute(lview, rview, res[i]);
524
4
        }
525
4
        return Status::OK();
526
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
514
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
14
        auto size = roffsets.size();
516
14
        res.resize(size);
517
14
        std::string_view lview(ldata.data, ldata.size);
518
28
        for (int i = 0; i < size; ++i) {
519
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
14
            std::string_view rview(r_raw_str, r_str_size);
522
523
14
            OP::execute(lview, rview, res[i]);
524
14
        }
525
14
        return Status::OK();
526
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
514
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
26
        auto size = roffsets.size();
516
26
        res.resize(size);
517
26
        std::string_view lview(ldata.data, ldata.size);
518
58
        for (int i = 0; i < size; ++i) {
519
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
32
            std::string_view rview(r_raw_str, r_str_size);
522
523
32
            OP::execute(lview, rview, res[i]);
524
32
        }
525
26
        return Status::OK();
526
26
    }
527
};
528
529
struct NameToLower {
530
    static constexpr auto name = "lower";
531
};
532
533
struct NameToUpper {
534
    static constexpr auto name = "upper";
535
};
536
537
template <typename OpName>
538
struct TransferImpl {
539
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
540
437
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
437
        size_t offset_size = offsets.size();
542
437
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
437
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
437
        res_offsets.resize(offset_size);
548
437
        if (is_ascii) {
549
377
            memcpy_small_allow_read_write_overflow15(
550
377
                    res_offsets.data(), offsets.data(),
551
377
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
377
            size_t data_length = data.size();
554
377
            res_data.resize(data_length);
555
377
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
85
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
292
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
292
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
292
            }
560
377
        } else {
561
60
            execute_utf8(data, offsets, res_data, res_offsets);
562
60
        }
563
564
437
        return Status::OK();
565
437
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
540
313
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
313
        size_t offset_size = offsets.size();
542
313
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
313
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
313
        res_offsets.resize(offset_size);
548
313
        if (is_ascii) {
549
292
            memcpy_small_allow_read_write_overflow15(
550
292
                    res_offsets.data(), offsets.data(),
551
292
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
292
            size_t data_length = data.size();
554
292
            res_data.resize(data_length);
555
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
292
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
292
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
292
            }
560
292
        } else {
561
21
            execute_utf8(data, offsets, res_data, res_offsets);
562
21
        }
563
564
313
        return Status::OK();
565
313
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
540
124
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
124
        size_t offset_size = offsets.size();
542
124
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
124
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
124
        res_offsets.resize(offset_size);
548
124
        if (is_ascii) {
549
85
            memcpy_small_allow_read_write_overflow15(
550
85
                    res_offsets.data(), offsets.data(),
551
85
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
85
            size_t data_length = data.size();
554
85
            res_data.resize(data_length);
555
85
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
85
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
            }
560
85
        } else {
561
39
            execute_utf8(data, offsets, res_data, res_offsets);
562
39
        }
563
564
124
        return Status::OK();
565
124
    }
566
567
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
568
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
60
        std::string result;
570
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
138
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
138
            result.clear();
575
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
91
                to_upper_utf8(begin, size, result);
577
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
47
                to_lower_utf8(begin, size, result);
579
47
            }
580
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
138
        }
582
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
568
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
21
        std::string result;
570
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
47
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
47
            result.clear();
575
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
                to_upper_utf8(begin, size, result);
577
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
47
                to_lower_utf8(begin, size, result);
579
47
            }
580
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
47
        }
582
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
568
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
39
        std::string result;
570
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
91
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
91
            result.clear();
575
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
91
                to_upper_utf8(begin, size, result);
577
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
                to_lower_utf8(begin, size, result);
579
            }
580
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
91
        }
582
39
    }
583
584
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
585
91
        icu::StringPiece sp;
586
91
        sp.set(data, size);
587
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
588
91
        unicode_str.toUpper();
589
91
        unicode_str.toUTF8String(result);
590
91
    }
591
592
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
593
47
        icu::StringPiece sp;
594
47
        sp.set(data, size);
595
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
596
47
        unicode_str.toLower();
597
47
        unicode_str.toUTF8String(result);
598
47
    }
599
};
600
601
// Capitalize first letter
602
struct NameToInitcap {
603
    static constexpr auto name = "initcap";
604
};
605
606
struct InitcapImpl {
607
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
608
172
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
609
172
        res_offsets.resize(offsets.size());
610
611
172
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
612
172
        if (is_ascii) {
613
114
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
614
114
        } else {
615
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
616
58
        }
617
172
        return Status::OK();
618
172
    }
619
620
    static void impl_vectors_ascii(const ColumnString::Chars& data,
621
                                   const ColumnString::Offsets& offsets,
622
                                   ColumnString::Chars& res_data,
623
114
                                   ColumnString::Offsets& res_offsets) {
624
114
        size_t offset_size = offsets.size();
625
114
        memcpy_small_allow_read_write_overflow15(
626
114
                res_offsets.data(), offsets.data(),
627
114
                offset_size * sizeof(ColumnString::Offsets::value_type));
628
629
114
        size_t data_length = data.size();
630
114
        res_data.resize(data_length);
631
114
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
632
633
114
        bool need_capitalize = true;
634
246
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
635
132
            auto end_index = res_offsets[offset_index];
636
132
            need_capitalize = true;
637
638
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
639
1.43k
                if (!::isalnum(res_data[i])) {
640
216
                    need_capitalize = true;
641
1.21k
                } else if (need_capitalize) {
642
                    /*
643
                    https://en.cppreference.com/w/cpp/string/byte/toupper
644
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
645
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
646
                    char my_toupper(char ch)
647
                    {
648
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
649
                    }
650
                    */
651
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
652
267
                    need_capitalize = false;
653
267
                }
654
1.43k
            }
655
656
132
            start_index = end_index;
657
132
        }
658
114
    }
659
660
    static void impl_vectors_utf8(const ColumnString::Chars& data,
661
                                  const ColumnString::Offsets& offsets,
662
                                  ColumnString::Chars& res_data,
663
58
                                  ColumnString::Offsets& res_offsets) {
664
58
        std::string result;
665
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
666
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
667
65
            uint32_t size = offsets[i] - offsets[i - 1];
668
65
            result.clear();
669
65
            to_initcap_utf8(begin, size, result);
670
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
671
65
        }
672
58
    }
673
674
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
675
65
        icu::StringPiece sp;
676
65
        sp.set(data, size);
677
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
678
65
        unicode_str.toLower();
679
65
        icu::UnicodeString output_str;
680
65
        bool need_capitalize = true;
681
65
        icu::StringCharacterIterator iter(unicode_str);
682
647
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
683
582
            if (!u_isalnum(ch)) {
684
105
                need_capitalize = true;
685
477
            } else if (need_capitalize) {
686
87
                ch = u_toupper(ch);
687
87
                need_capitalize = false;
688
87
            }
689
582
            output_str.append(ch);
690
582
        }
691
65
        output_str.toUTF8String(result);
692
65
    }
693
};
694
695
struct NameTrim {
696
    static constexpr auto name = "trim";
697
};
698
struct NameLTrim {
699
    static constexpr auto name = "ltrim";
700
};
701
struct NameRTrim {
702
    static constexpr auto name = "rtrim";
703
};
704
struct NameTrimIn {
705
    static constexpr auto name = "trim_in";
706
};
707
struct NameLTrimIn {
708
    static constexpr auto name = "ltrim_in";
709
};
710
struct NameRTrimIn {
711
    static constexpr auto name = "rtrim_in";
712
};
713
template <bool is_ltrim, bool is_rtrim, bool trim_single>
714
struct TrimUtil {
715
    static Status vector(const ColumnString::Chars& str_data,
716
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
717
302
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
302
        const size_t offset_size = str_offsets.size();
719
302
        res_offsets.resize(offset_size);
720
302
        res_data.reserve(str_data.size());
721
874
        for (size_t i = 0; i < offset_size; ++i) {
722
572
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
572
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
572
            if constexpr (is_ltrim) {
726
355
                str_begin =
727
355
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
355
            }
729
572
            if constexpr (is_rtrim) {
730
395
                str_end =
731
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
395
            }
733
734
572
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
572
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
572
        }
738
302
        return Status::OK();
739
302
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
58
        const size_t offset_size = str_offsets.size();
719
58
        res_offsets.resize(offset_size);
720
58
        res_data.reserve(str_data.size());
721
178
        for (size_t i = 0; i < offset_size; ++i) {
722
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
120
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
120
            if constexpr (is_ltrim) {
726
120
                str_begin =
727
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
120
            }
729
120
            if constexpr (is_rtrim) {
730
120
                str_end =
731
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
120
            }
733
734
120
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
120
        }
738
58
        return Status::OK();
739
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
54
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
54
        const size_t offset_size = str_offsets.size();
719
54
        res_offsets.resize(offset_size);
720
54
        res_data.reserve(str_data.size());
721
170
        for (size_t i = 0; i < offset_size; ++i) {
722
116
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
116
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
116
            if constexpr (is_ltrim) {
726
116
                str_begin =
727
116
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
116
            }
729
            if constexpr (is_rtrim) {
730
                str_end =
731
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
            }
733
734
116
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
116
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
116
        }
738
54
        return Status::OK();
739
54
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
94
        const size_t offset_size = str_offsets.size();
719
94
        res_offsets.resize(offset_size);
720
94
        res_data.reserve(str_data.size());
721
266
        for (size_t i = 0; i < offset_size; ++i) {
722
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
172
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
            if constexpr (is_ltrim) {
726
                str_begin =
727
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
            }
729
172
            if constexpr (is_rtrim) {
730
172
                str_end =
731
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
172
            }
733
734
172
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
172
        }
738
94
        return Status::OK();
739
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
24
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
24
        const size_t offset_size = str_offsets.size();
719
24
        res_offsets.resize(offset_size);
720
24
        res_data.reserve(str_data.size());
721
82
        for (size_t i = 0; i < offset_size; ++i) {
722
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
58
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
58
            if constexpr (is_ltrim) {
726
58
                str_begin =
727
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
58
            }
729
58
            if constexpr (is_rtrim) {
730
58
                str_end =
731
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
58
            }
733
734
58
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
58
        }
738
24
        return Status::OK();
739
24
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
27
        const size_t offset_size = str_offsets.size();
719
27
        res_offsets.resize(offset_size);
720
27
        res_data.reserve(str_data.size());
721
88
        for (size_t i = 0; i < offset_size; ++i) {
722
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
61
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
61
            if constexpr (is_ltrim) {
726
61
                str_begin =
727
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
61
            }
729
            if constexpr (is_rtrim) {
730
                str_end =
731
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
            }
733
734
61
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
61
        }
738
27
        return Status::OK();
739
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
45
        const size_t offset_size = str_offsets.size();
719
45
        res_offsets.resize(offset_size);
720
45
        res_data.reserve(str_data.size());
721
90
        for (size_t i = 0; i < offset_size; ++i) {
722
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
45
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
            if constexpr (is_ltrim) {
726
                str_begin =
727
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
            }
729
45
            if constexpr (is_rtrim) {
730
45
                str_end =
731
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
45
            }
733
734
45
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
45
        }
738
45
        return Status::OK();
739
45
    }
740
};
741
template <bool is_ltrim, bool is_rtrim, bool trim_single>
742
struct TrimInUtil {
743
    static Status vector(const ColumnString::Chars& str_data,
744
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
745
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
121
        const size_t offset_size = str_offsets.size();
747
121
        res_offsets.resize(offset_size);
748
121
        res_data.reserve(str_data.size());
749
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
121
                         simd::VStringFunctions::is_ascii(StringRef(
751
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
121
        if (all_ascii) {
754
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
68
        } else {
756
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
53
        }
758
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
43
        const size_t offset_size = str_offsets.size();
747
43
        res_offsets.resize(offset_size);
748
43
        res_data.reserve(str_data.size());
749
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
43
                         simd::VStringFunctions::is_ascii(StringRef(
751
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
43
        if (all_ascii) {
754
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
24
        } else {
756
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
19
        }
758
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
36
        const size_t offset_size = str_offsets.size();
747
36
        res_offsets.resize(offset_size);
748
36
        res_data.reserve(str_data.size());
749
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
36
                         simd::VStringFunctions::is_ascii(StringRef(
751
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
36
        if (all_ascii) {
754
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
19
        } else {
756
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
17
        }
758
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
42
        const size_t offset_size = str_offsets.size();
747
42
        res_offsets.resize(offset_size);
748
42
        res_data.reserve(str_data.size());
749
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
42
                         simd::VStringFunctions::is_ascii(StringRef(
751
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
42
        if (all_ascii) {
754
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
25
        } else {
756
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
17
        }
758
42
    }
759
760
private:
761
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
762
                                     const ColumnString::Offsets& str_offsets,
763
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
764
68
                                     ColumnString::Offsets& res_offsets) {
765
68
        const size_t offset_size = str_offsets.size();
766
68
        std::bitset<128> char_lookup;
767
68
        const char* remove_begin = remove_str.data;
768
68
        const char* remove_end = remove_str.data + remove_str.size;
769
770
251
        while (remove_begin < remove_end) {
771
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
183
            remove_begin += 1;
773
183
        }
774
775
136
        for (size_t i = 0; i < offset_size; ++i) {
776
68
            const char* str_begin =
777
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
68
            const char* left_trim_pos = str_begin;
780
68
            const char* right_trim_pos = str_end;
781
782
68
            if constexpr (is_ltrim) {
783
127
                while (left_trim_pos < str_end) {
784
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
30
                        break;
786
30
                    }
787
84
                    ++left_trim_pos;
788
84
                }
789
43
            }
790
791
68
            if constexpr (is_rtrim) {
792
114
                while (right_trim_pos > left_trim_pos) {
793
100
                    --right_trim_pos;
794
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
35
                        ++right_trim_pos;
796
35
                        break;
797
35
                    }
798
100
                }
799
49
            }
800
801
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
68
        }
805
806
68
        return Status::OK();
807
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
24
                                     ColumnString::Offsets& res_offsets) {
765
24
        const size_t offset_size = str_offsets.size();
766
24
        std::bitset<128> char_lookup;
767
24
        const char* remove_begin = remove_str.data;
768
24
        const char* remove_end = remove_str.data + remove_str.size;
769
770
86
        while (remove_begin < remove_end) {
771
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
62
            remove_begin += 1;
773
62
        }
774
775
48
        for (size_t i = 0; i < offset_size; ++i) {
776
24
            const char* str_begin =
777
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
24
            const char* left_trim_pos = str_begin;
780
24
            const char* right_trim_pos = str_end;
781
782
24
            if constexpr (is_ltrim) {
783
57
                while (left_trim_pos < str_end) {
784
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
17
                        break;
786
17
                    }
787
33
                    ++left_trim_pos;
788
33
                }
789
24
            }
790
791
24
            if constexpr (is_rtrim) {
792
39
                while (right_trim_pos > left_trim_pos) {
793
32
                    --right_trim_pos;
794
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
17
                        ++right_trim_pos;
796
17
                        break;
797
17
                    }
798
32
                }
799
24
            }
800
801
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
24
        }
805
806
24
        return Status::OK();
807
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
19
                                     ColumnString::Offsets& res_offsets) {
765
19
        const size_t offset_size = str_offsets.size();
766
19
        std::bitset<128> char_lookup;
767
19
        const char* remove_begin = remove_str.data;
768
19
        const char* remove_end = remove_str.data + remove_str.size;
769
770
73
        while (remove_begin < remove_end) {
771
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
54
            remove_begin += 1;
773
54
        }
774
775
38
        for (size_t i = 0; i < offset_size; ++i) {
776
19
            const char* str_begin =
777
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
19
            const char* left_trim_pos = str_begin;
780
19
            const char* right_trim_pos = str_end;
781
782
19
            if constexpr (is_ltrim) {
783
70
                while (left_trim_pos < str_end) {
784
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
13
                        break;
786
13
                    }
787
51
                    ++left_trim_pos;
788
51
                }
789
19
            }
790
791
            if constexpr (is_rtrim) {
792
                while (right_trim_pos > left_trim_pos) {
793
                    --right_trim_pos;
794
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
                        ++right_trim_pos;
796
                        break;
797
                    }
798
                }
799
            }
800
801
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
19
        }
805
806
19
        return Status::OK();
807
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
25
                                     ColumnString::Offsets& res_offsets) {
765
25
        const size_t offset_size = str_offsets.size();
766
25
        std::bitset<128> char_lookup;
767
25
        const char* remove_begin = remove_str.data;
768
25
        const char* remove_end = remove_str.data + remove_str.size;
769
770
92
        while (remove_begin < remove_end) {
771
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
67
            remove_begin += 1;
773
67
        }
774
775
50
        for (size_t i = 0; i < offset_size; ++i) {
776
25
            const char* str_begin =
777
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
25
            const char* left_trim_pos = str_begin;
780
25
            const char* right_trim_pos = str_end;
781
782
            if constexpr (is_ltrim) {
783
                while (left_trim_pos < str_end) {
784
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
                        break;
786
                    }
787
                    ++left_trim_pos;
788
                }
789
            }
790
791
25
            if constexpr (is_rtrim) {
792
75
                while (right_trim_pos > left_trim_pos) {
793
68
                    --right_trim_pos;
794
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
18
                        ++right_trim_pos;
796
18
                        break;
797
18
                    }
798
68
                }
799
25
            }
800
801
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
25
        }
805
806
25
        return Status::OK();
807
25
    }
808
809
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
810
                                    const ColumnString::Offsets& str_offsets,
811
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
812
53
                                    ColumnString::Offsets& res_offsets) {
813
53
        const size_t offset_size = str_offsets.size();
814
53
        res_offsets.resize(offset_size);
815
53
        res_data.reserve(str_data.size());
816
817
53
        std::unordered_set<std::string_view> char_lookup;
818
53
        const char* remove_begin = remove_str.data;
819
53
        const char* remove_end = remove_str.data + remove_str.size;
820
821
240
        while (remove_begin < remove_end) {
822
187
            size_t byte_len, char_len;
823
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
187
                    remove_begin, remove_end, 1);
825
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
187
            remove_begin += byte_len;
827
187
        }
828
829
140
        for (size_t i = 0; i < offset_size; ++i) {
830
87
            const char* str_begin =
831
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
87
            const char* left_trim_pos = str_begin;
834
87
            const char* right_trim_pos = str_end;
835
836
87
            if constexpr (is_ltrim) {
837
81
                while (left_trim_pos < str_end) {
838
73
                    size_t byte_len, char_len;
839
73
                    std::tie(byte_len, char_len) =
840
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
73
                                                                                   str_end, 1);
842
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
73
                        char_lookup.end()) {
844
52
                        break;
845
52
                    }
846
21
                    left_trim_pos += byte_len;
847
21
                }
848
60
            }
849
850
87
            if constexpr (is_rtrim) {
851
88
                while (right_trim_pos > left_trim_pos) {
852
80
                    const char* prev_char_pos = right_trim_pos;
853
156
                    do {
854
156
                        --prev_char_pos;
855
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
80
                        char_lookup.end()) {
859
52
                        break;
860
52
                    }
861
28
                    right_trim_pos = prev_char_pos;
862
28
                }
863
60
            }
864
865
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
87
        }
869
53
        return Status::OK();
870
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
19
                                    ColumnString::Offsets& res_offsets) {
813
19
        const size_t offset_size = str_offsets.size();
814
19
        res_offsets.resize(offset_size);
815
19
        res_data.reserve(str_data.size());
816
817
19
        std::unordered_set<std::string_view> char_lookup;
818
19
        const char* remove_begin = remove_str.data;
819
19
        const char* remove_end = remove_str.data + remove_str.size;
820
821
84
        while (remove_begin < remove_end) {
822
65
            size_t byte_len, char_len;
823
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
65
                    remove_begin, remove_end, 1);
825
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
65
            remove_begin += byte_len;
827
65
        }
828
829
52
        for (size_t i = 0; i < offset_size; ++i) {
830
33
            const char* str_begin =
831
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
33
            const char* left_trim_pos = str_begin;
834
33
            const char* right_trim_pos = str_end;
835
836
33
            if constexpr (is_ltrim) {
837
45
                while (left_trim_pos < str_end) {
838
41
                    size_t byte_len, char_len;
839
41
                    std::tie(byte_len, char_len) =
840
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
41
                                                                                   str_end, 1);
842
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
41
                        char_lookup.end()) {
844
29
                        break;
845
29
                    }
846
12
                    left_trim_pos += byte_len;
847
12
                }
848
33
            }
849
850
33
            if constexpr (is_rtrim) {
851
48
                while (right_trim_pos > left_trim_pos) {
852
44
                    const char* prev_char_pos = right_trim_pos;
853
90
                    do {
854
90
                        --prev_char_pos;
855
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
44
                        char_lookup.end()) {
859
29
                        break;
860
29
                    }
861
15
                    right_trim_pos = prev_char_pos;
862
15
                }
863
33
            }
864
865
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
33
        }
869
19
        return Status::OK();
870
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
17
                                    ColumnString::Offsets& res_offsets) {
813
17
        const size_t offset_size = str_offsets.size();
814
17
        res_offsets.resize(offset_size);
815
17
        res_data.reserve(str_data.size());
816
817
17
        std::unordered_set<std::string_view> char_lookup;
818
17
        const char* remove_begin = remove_str.data;
819
17
        const char* remove_end = remove_str.data + remove_str.size;
820
821
78
        while (remove_begin < remove_end) {
822
61
            size_t byte_len, char_len;
823
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
61
                    remove_begin, remove_end, 1);
825
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
61
            remove_begin += byte_len;
827
61
        }
828
829
44
        for (size_t i = 0; i < offset_size; ++i) {
830
27
            const char* str_begin =
831
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
27
            const char* left_trim_pos = str_begin;
834
27
            const char* right_trim_pos = str_end;
835
836
27
            if constexpr (is_ltrim) {
837
36
                while (left_trim_pos < str_end) {
838
32
                    size_t byte_len, char_len;
839
32
                    std::tie(byte_len, char_len) =
840
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
32
                                                                                   str_end, 1);
842
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
32
                        char_lookup.end()) {
844
23
                        break;
845
23
                    }
846
9
                    left_trim_pos += byte_len;
847
9
                }
848
27
            }
849
850
            if constexpr (is_rtrim) {
851
                while (right_trim_pos > left_trim_pos) {
852
                    const char* prev_char_pos = right_trim_pos;
853
                    do {
854
                        --prev_char_pos;
855
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
                        char_lookup.end()) {
859
                        break;
860
                    }
861
                    right_trim_pos = prev_char_pos;
862
                }
863
            }
864
865
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
27
        }
869
17
        return Status::OK();
870
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
17
                                    ColumnString::Offsets& res_offsets) {
813
17
        const size_t offset_size = str_offsets.size();
814
17
        res_offsets.resize(offset_size);
815
17
        res_data.reserve(str_data.size());
816
817
17
        std::unordered_set<std::string_view> char_lookup;
818
17
        const char* remove_begin = remove_str.data;
819
17
        const char* remove_end = remove_str.data + remove_str.size;
820
821
78
        while (remove_begin < remove_end) {
822
61
            size_t byte_len, char_len;
823
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
61
                    remove_begin, remove_end, 1);
825
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
61
            remove_begin += byte_len;
827
61
        }
828
829
44
        for (size_t i = 0; i < offset_size; ++i) {
830
27
            const char* str_begin =
831
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
27
            const char* left_trim_pos = str_begin;
834
27
            const char* right_trim_pos = str_end;
835
836
            if constexpr (is_ltrim) {
837
                while (left_trim_pos < str_end) {
838
                    size_t byte_len, char_len;
839
                    std::tie(byte_len, char_len) =
840
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
                                                                                   str_end, 1);
842
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
                        char_lookup.end()) {
844
                        break;
845
                    }
846
                    left_trim_pos += byte_len;
847
                }
848
            }
849
850
27
            if constexpr (is_rtrim) {
851
40
                while (right_trim_pos > left_trim_pos) {
852
36
                    const char* prev_char_pos = right_trim_pos;
853
66
                    do {
854
66
                        --prev_char_pos;
855
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
36
                        char_lookup.end()) {
859
23
                        break;
860
23
                    }
861
13
                    right_trim_pos = prev_char_pos;
862
13
                }
863
27
            }
864
865
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
27
        }
869
17
        return Status::OK();
870
17
    }
871
};
872
// This is an implementation of a parameter for the Trim function.
873
template <bool is_ltrim, bool is_rtrim, typename Name>
874
struct Trim1Impl {
875
    static constexpr auto name = Name::name;
876
877
159
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
45
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
37
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
878
879
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
880
141
                          uint32_t result, size_t input_rows_count) {
881
141
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
141
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
141
            auto col_res = ColumnString::create();
884
141
            char blank[] = " ";
885
141
            const StringRef remove_str(blank, 1);
886
141
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
141
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
141
                    col_res->get_offsets())));
889
141
            block.replace_by_position(result, std::move(col_res));
890
141
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
141
        return Status::OK();
896
141
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
48
                          uint32_t result, size_t input_rows_count) {
881
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
48
            auto col_res = ColumnString::create();
884
48
            char blank[] = " ";
885
48
            const StringRef remove_str(blank, 1);
886
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
48
                    col_res->get_offsets())));
889
48
            block.replace_by_position(result, std::move(col_res));
890
48
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
48
        return Status::OK();
896
48
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
39
                          uint32_t result, size_t input_rows_count) {
881
39
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
39
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
39
            auto col_res = ColumnString::create();
884
39
            char blank[] = " ";
885
39
            const StringRef remove_str(blank, 1);
886
39
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
39
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
39
                    col_res->get_offsets())));
889
39
            block.replace_by_position(result, std::move(col_res));
890
39
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
39
        return Status::OK();
896
39
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
42
                          uint32_t result, size_t input_rows_count) {
881
42
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
42
            auto col_res = ColumnString::create();
884
42
            char blank[] = " ";
885
42
            const StringRef remove_str(blank, 1);
886
42
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
42
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
42
                    col_res->get_offsets())));
889
42
            block.replace_by_position(result, std::move(col_res));
890
42
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
42
        return Status::OK();
896
42
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
1
                          uint32_t result, size_t input_rows_count) {
881
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
1
            auto col_res = ColumnString::create();
884
1
            char blank[] = " ";
885
1
            const StringRef remove_str(blank, 1);
886
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
1
                    col_res->get_offsets())));
889
1
            block.replace_by_position(result, std::move(col_res));
890
1
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
1
        return Status::OK();
896
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
5
                          uint32_t result, size_t input_rows_count) {
881
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
5
            auto col_res = ColumnString::create();
884
5
            char blank[] = " ";
885
5
            const StringRef remove_str(blank, 1);
886
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
5
                    col_res->get_offsets())));
889
5
            block.replace_by_position(result, std::move(col_res));
890
5
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
5
        return Status::OK();
896
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
6
                          uint32_t result, size_t input_rows_count) {
881
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
6
            auto col_res = ColumnString::create();
884
6
            char blank[] = " ";
885
6
            const StringRef remove_str(blank, 1);
886
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
6
                    col_res->get_offsets())));
889
6
            block.replace_by_position(result, std::move(col_res));
890
6
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
6
        return Status::OK();
896
6
    }
897
};
898
899
// This is an implementation of two parameters for the Trim function.
900
template <bool is_ltrim, bool is_rtrim, typename Name>
901
struct Trim2Impl {
902
    static constexpr auto name = Name::name;
903
904
226
    static DataTypes get_variadic_argument_types() {
905
226
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
226
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
20
    static DataTypes get_variadic_argument_types() {
905
20
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
20
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
29
    static DataTypes get_variadic_argument_types() {
905
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
84
    static DataTypes get_variadic_argument_types() {
905
84
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
84
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
27
    static DataTypes get_variadic_argument_types() {
905
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
29
    static DataTypes get_variadic_argument_types() {
905
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
37
    static DataTypes get_variadic_argument_types() {
905
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
37
    }
907
908
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
909
282
                          uint32_t result, size_t input_rows_count) {
910
282
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
282
        const auto& rcol =
912
282
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
282
                        ->get_data_column_ptr();
914
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
282
                auto col_res = ColumnString::create();
917
282
                const auto* remove_str_raw = col_right->get_chars().data();
918
282
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
282
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
282
                if (remove_str.size == 1) {
922
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
65
                            col_res->get_offsets())));
925
217
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
121
                                  std::is_same<Name, NameRTrimIn>::value) {
929
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
121
                                col->get_chars(), col->get_offsets(), remove_str,
931
121
                                col_res->get_chars(), col_res->get_offsets())));
932
121
                    } else {
933
96
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
96
                                col->get_chars(), col->get_offsets(), remove_str,
935
96
                                col_res->get_chars(), col_res->get_offsets())));
936
96
                    }
937
217
                }
938
282
                block.replace_by_position(result, std::move(col_res));
939
18.4E
            } else {
940
18.4E
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
18.4E
                                            block.get_by_position(arguments[1]).column->get_name(),
942
18.4E
                                            name);
943
18.4E
            }
944
945
281
        } else {
946
1
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
1
                                        block.get_by_position(arguments[0]).column->get_name(),
948
1
                                        name);
949
1
        }
950
282
        return Status::OK();
951
282
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
26
                          uint32_t result, size_t input_rows_count) {
910
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
26
        const auto& rcol =
912
26
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
26
                        ->get_data_column_ptr();
914
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
26
                auto col_res = ColumnString::create();
917
26
                const auto* remove_str_raw = col_right->get_chars().data();
918
26
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
26
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
26
                if (remove_str.size == 1) {
922
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
2
                            col_res->get_offsets())));
925
24
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
24
                    } else {
933
24
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
24
                                col->get_chars(), col->get_offsets(), remove_str,
935
24
                                col_res->get_chars(), col_res->get_offsets())));
936
24
                    }
937
24
                }
938
26
                block.replace_by_position(result, std::move(col_res));
939
26
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
26
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
26
        return Status::OK();
951
26
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
32
                          uint32_t result, size_t input_rows_count) {
910
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
32
        const auto& rcol =
912
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
32
                        ->get_data_column_ptr();
914
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
32
                auto col_res = ColumnString::create();
917
32
                const auto* remove_str_raw = col_right->get_chars().data();
918
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
32
                if (remove_str.size == 1) {
922
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
5
                            col_res->get_offsets())));
925
27
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
27
                    } else {
933
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
27
                                col->get_chars(), col->get_offsets(), remove_str,
935
27
                                col_res->get_chars(), col_res->get_offsets())));
936
27
                    }
937
27
                }
938
32
                block.replace_by_position(result, std::move(col_res));
939
18.4E
            } else {
940
18.4E
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
18.4E
                                            block.get_by_position(arguments[1]).column->get_name(),
942
18.4E
                                            name);
943
18.4E
            }
944
945
31
        } else {
946
1
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
1
                                        block.get_by_position(arguments[0]).column->get_name(),
948
1
                                        name);
949
1
        }
950
32
        return Status::OK();
951
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
85
                          uint32_t result, size_t input_rows_count) {
910
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
85
        const auto& rcol =
912
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
85
                        ->get_data_column_ptr();
914
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
85
                auto col_res = ColumnString::create();
917
85
                const auto* remove_str_raw = col_right->get_chars().data();
918
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
85
                if (remove_str.size == 1) {
922
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
40
                            col_res->get_offsets())));
925
45
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
45
                    } else {
933
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
45
                                col->get_chars(), col->get_offsets(), remove_str,
935
45
                                col_res->get_chars(), col_res->get_offsets())));
936
45
                    }
937
45
                }
938
85
                block.replace_by_position(result, std::move(col_res));
939
85
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
85
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
85
        return Status::OK();
951
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
50
                          uint32_t result, size_t input_rows_count) {
910
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
50
        const auto& rcol =
912
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
50
                        ->get_data_column_ptr();
914
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
50
                auto col_res = ColumnString::create();
917
50
                const auto* remove_str_raw = col_right->get_chars().data();
918
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
50
                if (remove_str.size == 1) {
922
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
7
                            col_res->get_offsets())));
925
43
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
43
                                  std::is_same<Name, NameRTrimIn>::value) {
929
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
43
                                col->get_chars(), col->get_offsets(), remove_str,
931
43
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
43
                }
938
50
                block.replace_by_position(result, std::move(col_res));
939
50
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
50
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
50
        return Status::OK();
951
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
41
                          uint32_t result, size_t input_rows_count) {
910
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
41
        const auto& rcol =
912
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
41
                        ->get_data_column_ptr();
914
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
41
                auto col_res = ColumnString::create();
917
41
                const auto* remove_str_raw = col_right->get_chars().data();
918
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
41
                if (remove_str.size == 1) {
922
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
5
                            col_res->get_offsets())));
925
36
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
36
                                  std::is_same<Name, NameRTrimIn>::value) {
929
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
36
                                col->get_chars(), col->get_offsets(), remove_str,
931
36
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
36
                }
938
41
                block.replace_by_position(result, std::move(col_res));
939
41
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
41
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
41
        return Status::OK();
951
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
48
                          uint32_t result, size_t input_rows_count) {
910
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
48
        const auto& rcol =
912
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
48
                        ->get_data_column_ptr();
914
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
48
                auto col_res = ColumnString::create();
917
48
                const auto* remove_str_raw = col_right->get_chars().data();
918
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
48
                if (remove_str.size == 1) {
922
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
6
                            col_res->get_offsets())));
925
42
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
42
                                  std::is_same<Name, NameRTrimIn>::value) {
929
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
42
                                col->get_chars(), col->get_offsets(), remove_str,
931
42
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
42
                }
938
48
                block.replace_by_position(result, std::move(col_res));
939
48
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
48
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
48
        return Status::OK();
951
48
    }
952
};
953
954
template <typename impl>
955
class FunctionTrim : public IFunction {
956
public:
957
    static constexpr auto name = impl::name;
958
397
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
958
46
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
958
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
958
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
958
21
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
958
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
958
85
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
958
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
958
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
958
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
958
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
958
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
958
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
959
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
960
961
289
    size_t get_number_of_arguments() const override {
962
289
        return get_variadic_argument_types_impl().size();
963
289
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
37
    size_t get_number_of_arguments() const override {
962
37
        return get_variadic_argument_types_impl().size();
963
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
29
    size_t get_number_of_arguments() const override {
962
29
        return get_variadic_argument_types_impl().size();
963
29
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
33
    size_t get_number_of_arguments() const override {
962
33
        return get_variadic_argument_types_impl().size();
963
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
12
    size_t get_number_of_arguments() const override {
962
12
        return get_variadic_argument_types_impl().size();
963
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
21
    size_t get_number_of_arguments() const override {
962
21
        return get_variadic_argument_types_impl().size();
963
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
76
    size_t get_number_of_arguments() const override {
962
76
        return get_variadic_argument_types_impl().size();
963
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
1
    size_t get_number_of_arguments() const override {
962
1
        return get_variadic_argument_types_impl().size();
963
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
5
    size_t get_number_of_arguments() const override {
962
5
        return get_variadic_argument_types_impl().size();
963
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
6
    size_t get_number_of_arguments() const override {
962
6
        return get_variadic_argument_types_impl().size();
963
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
19
    size_t get_number_of_arguments() const override {
962
19
        return get_variadic_argument_types_impl().size();
963
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
21
    size_t get_number_of_arguments() const override {
962
21
        return get_variadic_argument_types_impl().size();
963
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
29
    size_t get_number_of_arguments() const override {
962
29
        return get_variadic_argument_types_impl().size();
963
29
    }
964
965
289
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
289
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
289
        return arguments[0];
972
289
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
37
        return arguments[0];
972
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
29
        return arguments[0];
972
29
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
33
        return arguments[0];
972
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
12
        return arguments[0];
972
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
21
        return arguments[0];
972
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
76
        return arguments[0];
972
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
1
        return arguments[0];
972
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
5
        return arguments[0];
972
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
6
        return arguments[0];
972
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
19
        return arguments[0];
972
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
21
        return arguments[0];
972
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
29
        return arguments[0];
972
29
    }
973
    // The second parameter of "trim" is a constant.
974
572
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
60
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
37
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
975
976
385
    DataTypes get_variadic_argument_types_impl() const override {
977
385
        return impl::get_variadic_argument_types();
978
385
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
45
    DataTypes get_variadic_argument_types_impl() const override {
977
45
        return impl::get_variadic_argument_types();
978
45
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
37
    DataTypes get_variadic_argument_types_impl() const override {
977
37
        return impl::get_variadic_argument_types();
978
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
41
    DataTypes get_variadic_argument_types_impl() const override {
977
41
        return impl::get_variadic_argument_types();
978
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
20
    DataTypes get_variadic_argument_types_impl() const override {
977
20
        return impl::get_variadic_argument_types();
978
20
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
29
    DataTypes get_variadic_argument_types_impl() const override {
977
29
        return impl::get_variadic_argument_types();
978
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
84
    DataTypes get_variadic_argument_types_impl() const override {
977
84
        return impl::get_variadic_argument_types();
978
84
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
9
    DataTypes get_variadic_argument_types_impl() const override {
977
9
        return impl::get_variadic_argument_types();
978
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
13
    DataTypes get_variadic_argument_types_impl() const override {
977
13
        return impl::get_variadic_argument_types();
978
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
14
    DataTypes get_variadic_argument_types_impl() const override {
977
14
        return impl::get_variadic_argument_types();
978
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
27
    DataTypes get_variadic_argument_types_impl() const override {
977
27
        return impl::get_variadic_argument_types();
978
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
29
    DataTypes get_variadic_argument_types_impl() const override {
977
29
        return impl::get_variadic_argument_types();
978
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
37
    DataTypes get_variadic_argument_types_impl() const override {
977
37
        return impl::get_variadic_argument_types();
978
37
    }
979
980
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
981
423
                        uint32_t result, size_t input_rows_count) const override {
982
423
        return impl::execute(context, block, arguments, result, input_rows_count);
983
423
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
48
                        uint32_t result, size_t input_rows_count) const override {
982
48
        return impl::execute(context, block, arguments, result, input_rows_count);
983
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
39
                        uint32_t result, size_t input_rows_count) const override {
982
39
        return impl::execute(context, block, arguments, result, input_rows_count);
983
39
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
42
                        uint32_t result, size_t input_rows_count) const override {
982
42
        return impl::execute(context, block, arguments, result, input_rows_count);
983
42
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
26
                        uint32_t result, size_t input_rows_count) const override {
982
26
        return impl::execute(context, block, arguments, result, input_rows_count);
983
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
32
                        uint32_t result, size_t input_rows_count) const override {
982
32
        return impl::execute(context, block, arguments, result, input_rows_count);
983
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
85
                        uint32_t result, size_t input_rows_count) const override {
982
85
        return impl::execute(context, block, arguments, result, input_rows_count);
983
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
1
                        uint32_t result, size_t input_rows_count) const override {
982
1
        return impl::execute(context, block, arguments, result, input_rows_count);
983
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
5
                        uint32_t result, size_t input_rows_count) const override {
982
5
        return impl::execute(context, block, arguments, result, input_rows_count);
983
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
6
                        uint32_t result, size_t input_rows_count) const override {
982
6
        return impl::execute(context, block, arguments, result, input_rows_count);
983
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
50
                        uint32_t result, size_t input_rows_count) const override {
982
50
        return impl::execute(context, block, arguments, result, input_rows_count);
983
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
41
                        uint32_t result, size_t input_rows_count) const override {
982
41
        return impl::execute(context, block, arguments, result, input_rows_count);
983
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
48
                        uint32_t result, size_t input_rows_count) const override {
982
48
        return impl::execute(context, block, arguments, result, input_rows_count);
983
48
    }
984
};
985
986
struct UnHexImplEmpty {
987
    static constexpr auto name = "unhex";
988
};
989
990
struct UnHexImplNull {
991
    static constexpr auto name = "unhex_null";
992
};
993
994
template <typename Name>
995
struct UnHexImpl {
996
    static constexpr auto name = Name::name;
997
    using ReturnType = DataTypeString;
998
    using ColumnType = ColumnString;
999
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1000
1001
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1002
162
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1003
162
        auto rows_count = offsets.size();
1004
162
        dst_offsets.resize(rows_count);
1005
1006
162
        int64_t total_size = 0;
1007
430
        for (size_t i = 0; i < rows_count; i++) {
1008
268
            size_t len = offsets[i] - offsets[i - 1];
1009
268
            total_size += len / 2;
1010
268
        }
1011
162
        ColumnString::check_chars_length(total_size, rows_count);
1012
162
        dst_data.resize(total_size);
1013
162
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1014
162
        size_t offset = 0;
1015
1016
430
        for (int i = 0; i < rows_count; ++i) {
1017
268
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1018
268
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1019
1020
268
            if (UNLIKELY(srclen == 0)) {
1021
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1022
13
                continue;
1023
13
            }
1024
1025
255
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1026
1027
255
            offset += outlen;
1028
255
            dst_offsets[i] = cast_set<uint32_t>(offset);
1029
255
        }
1030
162
        dst_data.pop_back(total_size - offset);
1031
162
        return Status::OK();
1032
162
    }
1033
1034
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1035
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1036
33
                         ColumnUInt8::Container* null_map_data) {
1037
33
        auto rows_count = offsets.size();
1038
33
        dst_offsets.resize(rows_count);
1039
1040
33
        int64_t total_size = 0;
1041
84
        for (size_t i = 0; i < rows_count; i++) {
1042
51
            size_t len = offsets[i] - offsets[i - 1];
1043
51
            total_size += len / 2;
1044
51
        }
1045
33
        ColumnString::check_chars_length(total_size, rows_count);
1046
33
        dst_data.resize(total_size);
1047
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1048
33
        size_t offset = 0;
1049
1050
84
        for (int i = 0; i < rows_count; ++i) {
1051
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1052
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1053
1054
51
            if (UNLIKELY(srclen == 0)) {
1055
7
                (*null_map_data)[i] = 1;
1056
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1057
7
                continue;
1058
7
            }
1059
1060
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1061
1062
44
            if (outlen == 0) {
1063
13
                (*null_map_data)[i] = 1;
1064
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1065
13
                continue;
1066
13
            }
1067
1068
31
            offset += outlen;
1069
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1070
31
        }
1071
33
        dst_data.pop_back(total_size - offset);
1072
33
        return Status::OK();
1073
33
    }
1074
};
1075
1076
struct NameStringSpace {
1077
    static constexpr auto name = "space";
1078
};
1079
1080
struct StringSpace {
1081
    using ReturnType = DataTypeString;
1082
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1083
    using Type = Int32;
1084
    using ReturnColumnType = ColumnString;
1085
1086
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1087
10
                         ColumnString::Offsets& res_offsets) {
1088
10
        res_offsets.resize(data.size());
1089
10
        size_t input_size = res_offsets.size();
1090
10
        int64_t total_size = 0;
1091
34
        for (size_t i = 0; i < input_size; ++i) {
1092
24
            if (data[i] > 0) {
1093
14
                total_size += data[i];
1094
14
            }
1095
24
        }
1096
10
        ColumnString::check_chars_length(total_size, input_size);
1097
10
        res_data.reserve(total_size);
1098
1099
34
        for (size_t i = 0; i < input_size; ++i) {
1100
24
            if (data[i] > 0) [[likely]] {
1101
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1102
14
                cast_set(res_offsets[i], res_data.size());
1103
14
            } else {
1104
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1105
10
            }
1106
24
        }
1107
10
        return Status::OK();
1108
10
    }
1109
};
1110
1111
struct ToBase64Impl {
1112
    static constexpr auto name = "to_base64";
1113
    using ReturnType = DataTypeString;
1114
    using ColumnType = ColumnString;
1115
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1116
1117
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1118
109
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1119
109
        auto rows_count = offsets.size();
1120
109
        dst_offsets.resize(rows_count);
1121
1122
109
        size_t total_size = 0;
1123
266
        for (size_t i = 0; i < rows_count; i++) {
1124
157
            size_t len = offsets[i] - offsets[i - 1];
1125
157
            total_size += 4 * ((len + 2) / 3);
1126
157
        }
1127
109
        ColumnString::check_chars_length(total_size, rows_count);
1128
109
        dst_data.resize(total_size);
1129
109
        auto* dst_data_ptr = dst_data.data();
1130
109
        size_t offset = 0;
1131
1132
266
        for (int i = 0; i < rows_count; ++i) {
1133
157
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1134
157
            size_t srclen = offsets[i] - offsets[i - 1];
1135
1136
157
            if (UNLIKELY(srclen == 0)) {
1137
9
                dst_offsets[i] = cast_set<uint32_t>(offset);
1138
9
                continue;
1139
9
            }
1140
1141
148
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1142
148
                                               (unsigned char*)(dst_data_ptr + offset));
1143
1144
148
            offset += outlen;
1145
148
            dst_offsets[i] = cast_set<uint32_t>(offset);
1146
148
        }
1147
109
        dst_data.pop_back(total_size - offset);
1148
109
        return Status::OK();
1149
109
    }
1150
};
1151
1152
struct FromBase64Impl {
1153
    static constexpr auto name = "from_base64";
1154
    using ReturnType = DataTypeString;
1155
    using ColumnType = ColumnString;
1156
1157
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1158
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1159
111
                         NullMap& null_map) {
1160
111
        auto rows_count = offsets.size();
1161
111
        dst_offsets.resize(rows_count);
1162
1163
111
        size_t total_size = 0;
1164
287
        for (size_t i = 0; i < rows_count; i++) {
1165
176
            auto len = offsets[i] - offsets[i - 1];
1166
176
            total_size += len / 4 * 3;
1167
176
        }
1168
111
        ColumnString::check_chars_length(total_size, rows_count);
1169
111
        dst_data.resize(total_size);
1170
111
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1171
111
        size_t offset = 0;
1172
1173
287
        for (int i = 0; i < rows_count; ++i) {
1174
176
            if (UNLIKELY(null_map[i])) {
1175
0
                null_map[i] = 1;
1176
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1177
0
                continue;
1178
0
            }
1179
1180
176
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1181
176
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1182
1183
176
            if (UNLIKELY(srclen == 0)) {
1184
8
                dst_offsets[i] = cast_set<uint32_t>(offset);
1185
8
                continue;
1186
8
            }
1187
1188
168
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1189
1190
168
            if (outlen < 0) {
1191
68
                null_map[i] = 1;
1192
68
                dst_offsets[i] = cast_set<uint32_t>(offset);
1193
100
            } else {
1194
100
                offset += outlen;
1195
100
                dst_offsets[i] = cast_set<uint32_t>(offset);
1196
100
            }
1197
168
        }
1198
111
        dst_data.pop_back(total_size - offset);
1199
111
        return Status::OK();
1200
111
    }
1201
};
1202
1203
struct StringAppendTrailingCharIfAbsent {
1204
    static constexpr auto name = "append_trailing_char_if_absent";
1205
    using Chars = ColumnString::Chars;
1206
    using Offsets = ColumnString::Offsets;
1207
    using ReturnType = DataTypeString;
1208
    using ColumnType = ColumnString;
1209
1210
68
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1211
68
        if (str.size < end.size) {
1212
11
            return false;
1213
11
        }
1214
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1215
57
        return str.end_with(end);
1216
68
    }
1217
1218
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1219
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1220
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1221
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1222
56
        size_t input_rows_count = loffsets.size();
1223
56
        res_offsets.resize(input_rows_count);
1224
56
        fmt::memory_buffer buffer;
1225
1226
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1227
102
            buffer.clear();
1228
1229
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1230
102
                                       loffsets[i] - loffsets[i - 1]);
1231
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1232
102
                                       roffsets[i] - roffsets[i - 1]);
1233
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1234
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1235
102
                    rstr.begin(), rstr.end(), 2);
1236
1237
102
            if (char_len != 1) {
1238
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1239
66
                continue;
1240
66
            }
1241
36
            if (str_end_with(lstr, rstr)) {
1242
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1243
9
                continue;
1244
9
            }
1245
1246
27
            buffer.append(lstr.begin(), lstr.end());
1247
27
            buffer.append(rstr.begin(), rstr.end());
1248
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1249
27
                                        res_offsets);
1250
27
        }
1251
56
    }
1252
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1253
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1254
10
                              NullMap& null_map_data) {
1255
10
        size_t input_rows_count = loffsets.size();
1256
10
        res_offsets.resize(input_rows_count);
1257
10
        fmt::memory_buffer buffer;
1258
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1259
10
        auto [byte_len, char_len] =
1260
10
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1261
10
        if (char_len != 1) {
1262
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1263
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1264
2
            }
1265
2
            return;
1266
2
        }
1267
1268
34
        for (size_t i = 0; i < input_rows_count; ++i) {
1269
26
            buffer.clear();
1270
26
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1271
26
                                       loffsets[i] - loffsets[i - 1]);
1272
1273
26
            if (str_end_with(lstr, rstr)) {
1274
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1275
2
                continue;
1276
2
            }
1277
1278
24
            buffer.append(lstr.begin(), lstr.end());
1279
24
            buffer.append(rstr.begin(), rstr.end());
1280
24
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1281
24
                                        res_offsets);
1282
24
        }
1283
8
    }
1284
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1285
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1286
8
                              NullMap& null_map_data) {
1287
8
        size_t input_rows_count = roffsets.size();
1288
8
        res_offsets.resize(input_rows_count);
1289
8
        fmt::memory_buffer buffer;
1290
1291
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1292
8
            buffer.clear();
1293
1294
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1295
8
                                       roffsets[i] - roffsets[i - 1]);
1296
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1297
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1298
8
                    rstr.begin(), rstr.end(), 2);
1299
1300
8
            if (char_len != 1) {
1301
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1302
2
                continue;
1303
2
            }
1304
6
            if (str_end_with(lstr, rstr)) {
1305
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1306
2
                continue;
1307
2
            }
1308
1309
4
            buffer.append(lstr.begin(), lstr.end());
1310
4
            buffer.append(rstr.begin(), rstr.end());
1311
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1312
4
                                        res_offsets);
1313
4
        }
1314
8
    }
1315
};
1316
1317
struct StringLPad {
1318
    static constexpr auto name = "lpad";
1319
    static constexpr auto is_lpad = true;
1320
};
1321
1322
struct StringRPad {
1323
    static constexpr auto name = "rpad";
1324
    static constexpr auto is_lpad = false;
1325
};
1326
1327
template <typename LeftDataType, typename RightDataType>
1328
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1329
1330
template <typename LeftDataType, typename RightDataType>
1331
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1332
1333
template <typename LeftDataType, typename RightDataType>
1334
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1335
1336
// ready for regist function
1337
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1338
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1339
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1340
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1341
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1342
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1343
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1344
using FunctionStringStartsWith =
1345
        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
1346
using FunctionStringEndsWith =
1347
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1348
using FunctionStringInstr =
1349
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1350
using FunctionStringLocate =
1351
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1352
using FunctionStringFindInSet =
1353
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1354
1355
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1356
1357
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1358
1359
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1360
1361
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1362
1363
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1364
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1365
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1366
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1367
1368
using FunctionStringAppendTrailingCharIfAbsent =
1369
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1370
1371
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1372
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1373
1374
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1375
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1376
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1377
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1378
extern void register_function_string_search(SimpleFunctionFactory& factory);
1379
extern void register_function_string_url(SimpleFunctionFactory& factory);
1380
1381
8
void register_function_string(SimpleFunctionFactory& factory) {
1382
8
    register_function_string_basic(factory);
1383
8
    register_function_string_digest(factory);
1384
8
    register_function_string_mask(factory);
1385
8
    register_function_string_misc(factory);
1386
8
    register_function_string_search(factory);
1387
8
    register_function_string_url(factory);
1388
1389
8
    factory.register_function<FunctionStringParseDataSize>();
1390
8
    factory.register_function<FunctionStringASCII>();
1391
8
    factory.register_function<FunctionStringLength>();
1392
8
    factory.register_function<FunctionCrc32>();
1393
8
    factory.register_function<FunctionStringUTF8Length>();
1394
8
    factory.register_function<FunctionStringSpace>();
1395
8
    factory.register_function<FunctionStringStartsWith>();
1396
8
    factory.register_function<FunctionStringEndsWith>();
1397
8
    factory.register_function<FunctionStringInstr>();
1398
8
    factory.register_function<FunctionStringFindInSet>();
1399
8
    factory.register_function<FunctionStringLocate>();
1400
8
    factory.register_function<FunctionQuote>();
1401
8
    factory.register_function<FunctionReverseCommon>();
1402
8
    factory.register_function<FunctionUnHex>();
1403
8
    factory.register_function<FunctionUnHexNullable>();
1404
8
    factory.register_function<FunctionToLower>();
1405
8
    factory.register_function<FunctionToUpper>();
1406
8
    factory.register_function<FunctionToInitcap>();
1407
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1408
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1409
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1410
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1411
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1412
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1413
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1414
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1415
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1416
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1417
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1418
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1419
8
    factory.register_function<FunctionStringConcat>();
1420
8
    factory.register_function<FunctionStringElt>();
1421
8
    factory.register_function<FunctionStringConcatWs>();
1422
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1423
8
    factory.register_function<FunctionStringRepeat>();
1424
8
    factory.register_function<FunctionStringLPad>();
1425
8
    factory.register_function<FunctionStringRPad>();
1426
8
    factory.register_function<FunctionToBase64>();
1427
8
    factory.register_function<FunctionFromBase64>();
1428
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1429
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1430
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1431
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1432
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1433
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1434
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1435
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1436
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1437
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1438
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1439
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1440
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1441
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1442
8
    factory.register_function<
1443
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1444
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1445
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1446
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1447
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1448
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1449
8
    factory.register_function<FunctionOverlay>();
1450
8
    factory.register_function<FunctionIsValidUTF8>();
1451
1452
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1453
8
    factory.register_alias(FunctionToLower::name, "lcase");
1454
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1455
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1456
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1457
8
    factory.register_alias(FunctionOverlay::name, "insert");
1458
8
}
1459
1460
} // namespace doris