Coverage Report

Created: 2026-04-22 09:47

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <cstddef>
28
#include <cstdint>
29
#include <string_view>
30
31
#include "common/cast_set.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "core/column/column_string.h"
35
#include "core/pod_array_fwd.h"
36
#include "core/string_ref.h"
37
#include "exprs/function/function_reverse.h"
38
#include "exprs/function/function_string_concat.h"
39
#include "exprs/function/function_string_format.h"
40
#include "exprs/function/function_string_replace.h"
41
#include "exprs/function/function_string_to_string.h"
42
#include "exprs/function/function_totype.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/function/string_hex_util.h"
45
#include "util/string_search.hpp"
46
#include "util/url_coding.h"
47
#include "util/utf8_check.h"
48
49
namespace doris {
50
struct NameStringASCII {
51
    static constexpr auto name = "ascii";
52
};
53
54
struct StringASCII {
55
    using ReturnType = DataTypeInt32;
56
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
57
    using Type = String;
58
    using ReturnColumnType = ColumnInt32;
59
60
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
61
54
                         PaddedPODArray<Int32>& res) {
62
54
        auto size = offsets.size();
63
54
        res.resize(size);
64
152
        for (int i = 0; i < size; ++i) {
65
98
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
66
98
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
67
98
        }
68
54
        return Status::OK();
69
54
    }
70
};
71
72
struct NameParseDataSize {
73
    static constexpr auto name = "parse_data_size";
74
};
75
76
static const std::map<std::string_view, Int128> UNITS = {
77
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
78
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
79
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
80
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
81
        {"YB", static_cast<Int128>(1) << 80}};
82
83
struct ParseDataSize {
84
    using ReturnType = DataTypeInt128;
85
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
86
    using Type = String;
87
    using ReturnColumnType = ColumnInt128;
88
89
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
90
47
                         PaddedPODArray<Int128>& res) {
91
47
        auto size = offsets.size();
92
47
        res.resize(size);
93
99
        for (int i = 0; i < size; ++i) {
94
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
95
52
            int str_size = offsets[i] - offsets[i - 1];
96
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
97
52
        }
98
47
        return Status::OK();
99
47
    }
100
101
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
102
52
        int digit_length = 0;
103
216
        for (char c : dataSize) {
104
216
            if (isdigit(c) || c == '.') {
105
166
                digit_length++;
106
166
            } else {
107
50
                break;
108
50
            }
109
216
        }
110
111
52
        if (digit_length == 0) {
112
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
113
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
114
4
                                   dataSize);
115
4
        }
116
        // 123.45MB--->123.45 : MB
117
48
        double value = 0.0;
118
48
        try {
119
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
120
48
        } catch (const std::exception& e) {
121
0
            throw doris::Exception(
122
0
                    ErrorCode::INVALID_ARGUMENT,
123
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
124
0
                    dataSize, e.what());
125
0
        }
126
48
        auto unit = dataSize.substr(digit_length);
127
48
        auto it = UNITS.find(unit);
128
48
        if (it != UNITS.end()) {
129
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
130
45
        } else {
131
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
132
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
133
3
                                   dataSize);
134
3
        }
135
48
    }
136
};
137
138
struct NameQuote {
139
    static constexpr auto name = "quote";
140
};
141
142
struct NameQuoteImpl {
143
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
144
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
145
17
        size_t offset_size = offsets.size();
146
17
        ColumnString::Offset pos = 0;
147
17
        res_offsets.resize(offset_size);
148
17
        res_data.resize(data.size() + offset_size * 2);
149
45
        for (int i = 0; i < offset_size; i++) {
150
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
151
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
152
28
            res_data[pos] = '\'';
153
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
154
28
            res_data[pos + size + 1] = '\'';
155
28
            pos += size + 2;
156
28
            res_offsets[i] = pos;
157
28
        }
158
17
        return Status::OK();
159
17
    }
160
};
161
162
struct NameStringLength {
163
    static constexpr auto name = "length";
164
};
165
166
struct StringLengthImpl {
167
    using ReturnType = DataTypeInt32;
168
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
169
    using Type = String;
170
    using ReturnColumnType = ColumnInt32;
171
172
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
173
5.71k
                         PaddedPODArray<Int32>& res) {
174
5.71k
        auto size = offsets.size();
175
5.71k
        res.resize(size);
176
4.37M
        for (int i = 0; i < size; ++i) {
177
4.37M
            int str_size = offsets[i] - offsets[i - 1];
178
4.37M
            res[i] = str_size;
179
4.37M
        }
180
5.71k
        return Status::OK();
181
5.71k
    }
182
};
183
184
struct NameCrc32 {
185
    static constexpr auto name = "crc32";
186
};
187
188
struct Crc32Impl {
189
    using ReturnType = DataTypeInt64;
190
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
191
    using Type = String;
192
    using ReturnColumnType = ColumnInt64;
193
194
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
195
3
                         PaddedPODArray<Int64>& res) {
196
3
        auto size = offsets.size();
197
3
        res.resize(size);
198
6
        for (int i = 0; i < size; ++i) {
199
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
200
3
                             offsets[i] - offsets[i - 1]);
201
3
        }
202
3
        return Status::OK();
203
3
    }
204
};
205
206
struct NameStringUtf8Length {
207
    static constexpr auto name = "char_length";
208
};
209
210
struct StringUtf8LengthImpl {
211
    using ReturnType = DataTypeInt32;
212
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
213
    using Type = String;
214
    using ReturnColumnType = ColumnInt32;
215
216
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
217
50
                         PaddedPODArray<Int32>& res) {
218
50
        auto size = offsets.size();
219
50
        res.resize(size);
220
144
        for (int i = 0; i < size; ++i) {
221
94
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
222
94
            int str_size = offsets[i] - offsets[i - 1];
223
94
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
224
94
        }
225
50
        return Status::OK();
226
50
    }
227
};
228
229
struct NameIsValidUTF8 {
230
    static constexpr auto name = "is_valid_utf8";
231
};
232
233
struct IsValidUTF8Impl {
234
    using ReturnType = DataTypeUInt8;
235
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
236
    using Type = String;
237
    using ReturnColumnType = ColumnUInt8;
238
239
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
240
39
                         PaddedPODArray<UInt8>& res) {
241
39
        auto size = offsets.size();
242
39
        res.resize(size);
243
98
        for (size_t i = 0; i < size; ++i) {
244
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
245
59
            size_t str_size = offsets[i] - offsets[i - 1];
246
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
247
59
        }
248
39
        return Status::OK();
249
39
    }
250
};
251
252
struct NameStartsWith {
253
    static constexpr auto name = "starts_with";
254
};
255
256
struct StartsWithOp {
257
    using ResultDataType = DataTypeUInt8;
258
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
259
260
135
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
261
135
        res = strl.starts_with(strr);
262
135
    }
263
};
264
265
struct NameEndsWith {
266
    static constexpr auto name = "ends_with";
267
};
268
269
struct EndsWithOp {
270
    using ResultDataType = DataTypeUInt8;
271
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
272
273
142
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
274
142
        res = strl.ends_with(strr);
275
142
    }
276
};
277
278
struct NameFindInSet {
279
    static constexpr auto name = "find_in_set";
280
};
281
282
struct FindInSetOp {
283
    using ResultDataType = DataTypeInt32;
284
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
285
170
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
286
670
        for (const auto& c : strl) {
287
670
            if (c == ',') {
288
21
                res = 0;
289
21
                return;
290
21
            }
291
670
        }
292
293
149
        int32_t token_index = 1;
294
149
        int32_t start = 0;
295
149
        int32_t end;
296
297
253
        do {
298
253
            end = start;
299
            // Position end.
300
1.05k
            while (end < strr.length() && strr[end] != ',') {
301
806
                ++end;
302
806
            }
303
304
253
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
305
93
                res = token_index;
306
93
                return;
307
93
            }
308
309
            // Re-position start and end past ','
310
160
            start = end + 1;
311
160
            ++token_index;
312
160
        } while (start < strr.length());
313
56
        res = 0;
314
56
    }
315
};
316
317
struct NameInstr {
318
    static constexpr auto name = "instr";
319
};
320
321
// LeftDataType and RightDataType are DataTypeString
322
template <typename LeftDataType, typename RightDataType>
323
struct StringInStrImpl {
324
    using ResultDataType = DataTypeInt32;
325
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
326
327
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
328
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
329
72
        StringRef lstr_ref(ldata.data, ldata.size);
330
331
72
        auto size = roffsets.size();
332
72
        res.resize(size);
333
144
        for (int i = 0; i < size; ++i) {
334
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
335
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
336
337
72
            StringRef rstr_ref(r_raw_str, r_str_size);
338
339
72
            res[i] = execute(lstr_ref, rstr_ref);
340
72
        }
341
342
72
        return Status::OK();
343
72
    }
344
345
    static Status vector_scalar(const ColumnString::Chars& ldata,
346
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
347
86
                                ResultPaddedPODArray& res) {
348
86
        auto size = loffsets.size();
349
86
        res.resize(size);
350
351
86
        if (rdata.size == 0) {
352
12
            std::fill(res.begin(), res.end(), 1);
353
12
            return Status::OK();
354
12
        }
355
356
74
        const UInt8* begin = ldata.data();
357
74
        const UInt8* end = begin + ldata.size();
358
74
        const UInt8* pos = begin;
359
360
        /// Current index in the array of strings.
361
74
        size_t i = 0;
362
74
        std::fill(res.begin(), res.end(), 0);
363
364
74
        StringRef rstr_ref(rdata.data, rdata.size);
365
74
        StringSearch search(&rstr_ref);
366
367
90
        while (pos < end) {
368
            // search return matched substring start offset
369
64
            pos = (UInt8*)search.search((char*)pos, end - pos);
370
64
            if (pos >= end) {
371
48
                break;
372
48
            }
373
374
            /// Determine which index it refers to.
375
            /// begin + value_offsets[i] is the start offset of string at i+1
376
16
            while (begin + loffsets[i] < pos) {
377
0
                ++i;
378
0
            }
379
380
            /// We check that the entry does not pass through the boundaries of strings.
381
16
            if (pos + rdata.size <= begin + loffsets[i]) {
382
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
383
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
384
16
                auto len = std::min(l_str_size, loc);
385
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
386
16
                res[i] = loc + 1;
387
16
            }
388
389
            // move to next string offset
390
16
            pos = begin + loffsets[i];
391
16
            ++i;
392
16
        }
393
394
74
        return Status::OK();
395
86
    }
396
397
    static Status vector_vector(const ColumnString::Chars& ldata,
398
                                const ColumnString::Offsets& loffsets,
399
                                const ColumnString::Chars& rdata,
400
207
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
401
207
        DCHECK_EQ(loffsets.size(), roffsets.size());
402
403
207
        auto size = loffsets.size();
404
207
        res.resize(size);
405
661
        for (int i = 0; i < size; ++i) {
406
454
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
407
454
            int l_str_size = loffsets[i] - loffsets[i - 1];
408
454
            StringRef lstr_ref(l_raw_str, l_str_size);
409
410
454
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
411
454
            int r_str_size = roffsets[i] - roffsets[i - 1];
412
454
            StringRef rstr_ref(r_raw_str, r_str_size);
413
414
454
            res[i] = execute(lstr_ref, rstr_ref);
415
454
        }
416
417
207
        return Status::OK();
418
207
    }
419
420
526
    static int execute(const StringRef& strl, const StringRef& strr) {
421
526
        if (strr.size == 0) {
422
71
            return 1;
423
71
        }
424
425
455
        StringSearch search(&strr);
426
        // Hive returns positions starting from 1.
427
455
        int loc = search.search(&strl);
428
455
        if (loc > 0) {
429
43
            int len = std::min(loc, (int)strl.size);
430
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
431
43
        }
432
433
455
        return loc + 1;
434
526
    }
435
};
436
437
// the same impl as instr
438
struct NameLocate {
439
    static constexpr auto name = "locate";
440
};
441
442
// LeftDataType and RightDataType are DataTypeString
443
template <typename LeftDataType, typename RightDataType>
444
struct StringLocateImpl {
445
    using ResultDataType = DataTypeInt32;
446
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
447
448
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
449
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
450
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
451
38
                                                                           res);
452
38
    }
453
454
    static Status vector_scalar(const ColumnString::Chars& ldata,
455
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
456
36
                                ResultPaddedPODArray& res) {
457
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
458
36
                                                                           res);
459
36
    }
460
461
    static Status vector_vector(const ColumnString::Chars& ldata,
462
                                const ColumnString::Offsets& loffsets,
463
                                const ColumnString::Chars& rdata,
464
126
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
465
126
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
466
126
                                                                           loffsets, res);
467
126
    }
468
};
469
470
// LeftDataType and RightDataType are DataTypeString
471
template <typename LeftDataType, typename RightDataType, typename OP>
472
struct StringFunctionImpl {
473
    using ResultDataType = typename OP::ResultDataType;
474
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
475
476
    static Status vector_vector(const ColumnString::Chars& ldata,
477
                                const ColumnString::Offsets& loffsets,
478
                                const ColumnString::Chars& rdata,
479
213
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
213
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
213
        auto size = loffsets.size();
483
213
        res.resize(size);
484
576
        for (int i = 0; i < size; ++i) {
485
363
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
363
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
363
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
363
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
363
            std::string_view lview(l_raw_str, l_str_size);
492
363
            std::string_view rview(r_raw_str, r_str_size);
493
494
363
            OP::execute(lview, rview, res[i]);
495
363
        }
496
213
        return Status::OK();
497
213
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
479
88
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
88
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
88
        auto size = loffsets.size();
483
88
        res.resize(size);
484
215
        for (int i = 0; i < size; ++i) {
485
127
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
127
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
127
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
127
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
127
            std::string_view lview(l_raw_str, l_str_size);
492
127
            std::string_view rview(r_raw_str, r_str_size);
493
494
127
            OP::execute(lview, rview, res[i]);
495
127
        }
496
88
        return Status::OK();
497
88
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
479
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
61
        auto size = loffsets.size();
483
61
        res.resize(size);
484
175
        for (int i = 0; i < size; ++i) {
485
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
114
            std::string_view lview(l_raw_str, l_str_size);
492
114
            std::string_view rview(r_raw_str, r_str_size);
493
494
114
            OP::execute(lview, rview, res[i]);
495
114
        }
496
61
        return Status::OK();
497
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
479
64
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
480
64
        DCHECK_EQ(loffsets.size(), roffsets.size());
481
482
64
        auto size = loffsets.size();
483
64
        res.resize(size);
484
186
        for (int i = 0; i < size; ++i) {
485
122
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
486
122
            int l_str_size = loffsets[i] - loffsets[i - 1];
487
488
122
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
489
122
            int r_str_size = roffsets[i] - roffsets[i - 1];
490
491
122
            std::string_view lview(l_raw_str, l_str_size);
492
122
            std::string_view rview(r_raw_str, r_str_size);
493
494
122
            OP::execute(lview, rview, res[i]);
495
122
        }
496
64
        return Status::OK();
497
64
    }
498
    static Status vector_scalar(const ColumnString::Chars& ldata,
499
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
500
34
                                ResultPaddedPODArray& res) {
501
34
        auto size = loffsets.size();
502
34
        res.resize(size);
503
34
        std::string_view rview(rdata.data, rdata.size);
504
68
        for (int i = 0; i < size; ++i) {
505
34
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
34
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
34
            std::string_view lview(l_raw_str, l_str_size);
508
509
34
            OP::execute(lview, rview, res[i]);
510
34
        }
511
34
        return Status::OK();
512
34
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
500
4
                                ResultPaddedPODArray& res) {
501
4
        auto size = loffsets.size();
502
4
        res.resize(size);
503
4
        std::string_view rview(rdata.data, rdata.size);
504
8
        for (int i = 0; i < size; ++i) {
505
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
4
            std::string_view lview(l_raw_str, l_str_size);
508
509
4
            OP::execute(lview, rview, res[i]);
510
4
        }
511
4
        return Status::OK();
512
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
500
14
                                ResultPaddedPODArray& res) {
501
14
        auto size = loffsets.size();
502
14
        res.resize(size);
503
14
        std::string_view rview(rdata.data, rdata.size);
504
28
        for (int i = 0; i < size; ++i) {
505
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
14
            std::string_view lview(l_raw_str, l_str_size);
508
509
14
            OP::execute(lview, rview, res[i]);
510
14
        }
511
14
        return Status::OK();
512
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
500
16
                                ResultPaddedPODArray& res) {
501
16
        auto size = loffsets.size();
502
16
        res.resize(size);
503
16
        std::string_view rview(rdata.data, rdata.size);
504
32
        for (int i = 0; i < size; ++i) {
505
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
506
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
507
16
            std::string_view lview(l_raw_str, l_str_size);
508
509
16
            OP::execute(lview, rview, res[i]);
510
16
        }
511
16
        return Status::OK();
512
16
    }
513
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
514
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
44
        auto size = roffsets.size();
516
44
        res.resize(size);
517
44
        std::string_view lview(ldata.data, ldata.size);
518
94
        for (int i = 0; i < size; ++i) {
519
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
50
            std::string_view rview(r_raw_str, r_str_size);
522
523
50
            OP::execute(lview, rview, res[i]);
524
50
        }
525
44
        return Status::OK();
526
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
514
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
4
        auto size = roffsets.size();
516
4
        res.resize(size);
517
4
        std::string_view lview(ldata.data, ldata.size);
518
8
        for (int i = 0; i < size; ++i) {
519
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
4
            std::string_view rview(r_raw_str, r_str_size);
522
523
4
            OP::execute(lview, rview, res[i]);
524
4
        }
525
4
        return Status::OK();
526
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
514
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
14
        auto size = roffsets.size();
516
14
        res.resize(size);
517
14
        std::string_view lview(ldata.data, ldata.size);
518
28
        for (int i = 0; i < size; ++i) {
519
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
14
            std::string_view rview(r_raw_str, r_str_size);
522
523
14
            OP::execute(lview, rview, res[i]);
524
14
        }
525
14
        return Status::OK();
526
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
514
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
515
26
        auto size = roffsets.size();
516
26
        res.resize(size);
517
26
        std::string_view lview(ldata.data, ldata.size);
518
58
        for (int i = 0; i < size; ++i) {
519
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
520
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
521
32
            std::string_view rview(r_raw_str, r_str_size);
522
523
32
            OP::execute(lview, rview, res[i]);
524
32
        }
525
26
        return Status::OK();
526
26
    }
527
};
528
529
struct NameToLower {
530
    static constexpr auto name = "lower";
531
};
532
533
struct NameToUpper {
534
    static constexpr auto name = "upper";
535
};
536
537
template <typename OpName>
538
struct TransferImpl {
539
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
540
329
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
329
        size_t offset_size = offsets.size();
542
329
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
329
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
329
        res_offsets.resize(offset_size);
548
329
        if (is_ascii) {
549
269
            memcpy_small_allow_read_write_overflow15(
550
269
                    res_offsets.data(), offsets.data(),
551
269
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
269
            size_t data_length = data.size();
554
269
            res_data.resize(data_length);
555
269
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
85
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
184
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
184
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
184
            }
560
269
        } else {
561
60
            execute_utf8(data, offsets, res_data, res_offsets);
562
60
        }
563
564
329
        return Status::OK();
565
329
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
540
205
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
205
        size_t offset_size = offsets.size();
542
205
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
205
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
205
        res_offsets.resize(offset_size);
548
205
        if (is_ascii) {
549
184
            memcpy_small_allow_read_write_overflow15(
550
184
                    res_offsets.data(), offsets.data(),
551
184
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
184
            size_t data_length = data.size();
554
184
            res_data.resize(data_length);
555
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
184
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
184
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
184
            }
560
184
        } else {
561
21
            execute_utf8(data, offsets, res_data, res_offsets);
562
21
        }
563
564
205
        return Status::OK();
565
205
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
540
124
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
541
124
        size_t offset_size = offsets.size();
542
124
        if (UNLIKELY(!offset_size)) {
543
0
            return Status::OK();
544
0
        }
545
546
124
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
547
124
        res_offsets.resize(offset_size);
548
124
        if (is_ascii) {
549
85
            memcpy_small_allow_read_write_overflow15(
550
85
                    res_offsets.data(), offsets.data(),
551
85
                    offset_size * sizeof(ColumnString::Offsets::value_type));
552
553
85
            size_t data_length = data.size();
554
85
            res_data.resize(data_length);
555
85
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
556
85
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
557
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
558
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
559
            }
560
85
        } else {
561
39
            execute_utf8(data, offsets, res_data, res_offsets);
562
39
        }
563
564
124
        return Status::OK();
565
124
    }
566
567
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
568
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
60
        std::string result;
570
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
138
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
138
            result.clear();
575
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
91
                to_upper_utf8(begin, size, result);
577
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
47
                to_lower_utf8(begin, size, result);
579
47
            }
580
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
138
        }
582
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
568
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
21
        std::string result;
570
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
47
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
47
            result.clear();
575
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
                to_upper_utf8(begin, size, result);
577
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
47
                to_lower_utf8(begin, size, result);
579
47
            }
580
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
47
        }
582
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
568
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
569
39
        std::string result;
570
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
571
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
572
91
            uint32_t size = offsets[i] - offsets[i - 1];
573
574
91
            result.clear();
575
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
576
91
                to_upper_utf8(begin, size, result);
577
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
578
                to_lower_utf8(begin, size, result);
579
            }
580
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
581
91
        }
582
39
    }
583
584
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
585
91
        icu::StringPiece sp;
586
91
        sp.set(data, size);
587
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
588
91
        unicode_str.toUpper();
589
91
        unicode_str.toUTF8String(result);
590
91
    }
591
592
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
593
47
        icu::StringPiece sp;
594
47
        sp.set(data, size);
595
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
596
47
        unicode_str.toLower();
597
47
        unicode_str.toUTF8String(result);
598
47
    }
599
};
600
601
// Capitalize first letter
602
struct NameToInitcap {
603
    static constexpr auto name = "initcap";
604
};
605
606
struct InitcapImpl {
607
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
608
173
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
609
173
        res_offsets.resize(offsets.size());
610
611
173
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
612
173
        if (is_ascii) {
613
115
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
614
115
        } else {
615
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
616
58
        }
617
173
        return Status::OK();
618
173
    }
619
620
    static void impl_vectors_ascii(const ColumnString::Chars& data,
621
                                   const ColumnString::Offsets& offsets,
622
                                   ColumnString::Chars& res_data,
623
115
                                   ColumnString::Offsets& res_offsets) {
624
115
        size_t offset_size = offsets.size();
625
115
        memcpy_small_allow_read_write_overflow15(
626
115
                res_offsets.data(), offsets.data(),
627
115
                offset_size * sizeof(ColumnString::Offsets::value_type));
628
629
115
        size_t data_length = data.size();
630
115
        res_data.resize(data_length);
631
115
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
632
633
115
        bool need_capitalize = true;
634
247
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
635
132
            auto end_index = res_offsets[offset_index];
636
132
            need_capitalize = true;
637
638
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
639
1.43k
                if (!::isalnum(res_data[i])) {
640
216
                    need_capitalize = true;
641
1.21k
                } else if (need_capitalize) {
642
                    /*
643
                    https://en.cppreference.com/w/cpp/string/byte/toupper
644
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
645
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
646
                    char my_toupper(char ch)
647
                    {
648
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
649
                    }
650
                    */
651
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
652
267
                    need_capitalize = false;
653
267
                }
654
1.43k
            }
655
656
132
            start_index = end_index;
657
132
        }
658
115
    }
659
660
    static void impl_vectors_utf8(const ColumnString::Chars& data,
661
                                  const ColumnString::Offsets& offsets,
662
                                  ColumnString::Chars& res_data,
663
58
                                  ColumnString::Offsets& res_offsets) {
664
58
        std::string result;
665
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
666
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
667
65
            uint32_t size = offsets[i] - offsets[i - 1];
668
65
            result.clear();
669
65
            to_initcap_utf8(begin, size, result);
670
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
671
65
        }
672
58
    }
673
674
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
675
65
        icu::StringPiece sp;
676
65
        sp.set(data, size);
677
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
678
65
        unicode_str.toLower();
679
65
        icu::UnicodeString output_str;
680
65
        bool need_capitalize = true;
681
65
        icu::StringCharacterIterator iter(unicode_str);
682
647
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
683
582
            if (!u_isalnum(ch)) {
684
105
                need_capitalize = true;
685
477
            } else if (need_capitalize) {
686
87
                ch = u_toupper(ch);
687
87
                need_capitalize = false;
688
87
            }
689
582
            output_str.append(ch);
690
582
        }
691
65
        output_str.toUTF8String(result);
692
65
    }
693
};
694
695
struct NameTrim {
696
    static constexpr auto name = "trim";
697
};
698
struct NameLTrim {
699
    static constexpr auto name = "ltrim";
700
};
701
struct NameRTrim {
702
    static constexpr auto name = "rtrim";
703
};
704
struct NameTrimIn {
705
    static constexpr auto name = "trim_in";
706
};
707
struct NameLTrimIn {
708
    static constexpr auto name = "ltrim_in";
709
};
710
struct NameRTrimIn {
711
    static constexpr auto name = "rtrim_in";
712
};
713
template <bool is_ltrim, bool is_rtrim, bool trim_single>
714
struct TrimUtil {
715
    static Status vector(const ColumnString::Chars& str_data,
716
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
717
300
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
300
        const size_t offset_size = str_offsets.size();
719
300
        res_offsets.resize(offset_size);
720
300
        res_data.reserve(str_data.size());
721
852
        for (size_t i = 0; i < offset_size; ++i) {
722
552
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
552
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
552
            if constexpr (is_ltrim) {
726
335
                str_begin =
727
335
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
335
            }
729
552
            if constexpr (is_rtrim) {
730
395
                str_end =
731
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
395
            }
733
734
552
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
552
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
552
        }
738
300
        return Status::OK();
739
300
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
58
        const size_t offset_size = str_offsets.size();
719
58
        res_offsets.resize(offset_size);
720
58
        res_data.reserve(str_data.size());
721
178
        for (size_t i = 0; i < offset_size; ++i) {
722
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
120
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
120
            if constexpr (is_ltrim) {
726
120
                str_begin =
727
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
120
            }
729
120
            if constexpr (is_rtrim) {
730
120
                str_end =
731
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
120
            }
733
734
120
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
120
        }
738
58
        return Status::OK();
739
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
52
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
52
        const size_t offset_size = str_offsets.size();
719
52
        res_offsets.resize(offset_size);
720
52
        res_data.reserve(str_data.size());
721
148
        for (size_t i = 0; i < offset_size; ++i) {
722
96
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
96
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
96
            if constexpr (is_ltrim) {
726
96
                str_begin =
727
96
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
96
            }
729
            if constexpr (is_rtrim) {
730
                str_end =
731
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
            }
733
734
96
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
96
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
96
        }
738
52
        return Status::OK();
739
52
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
94
        const size_t offset_size = str_offsets.size();
719
94
        res_offsets.resize(offset_size);
720
94
        res_data.reserve(str_data.size());
721
266
        for (size_t i = 0; i < offset_size; ++i) {
722
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
172
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
            if constexpr (is_ltrim) {
726
                str_begin =
727
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
            }
729
172
            if constexpr (is_rtrim) {
730
172
                str_end =
731
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
172
            }
733
734
172
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
172
        }
738
94
        return Status::OK();
739
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
24
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
24
        const size_t offset_size = str_offsets.size();
719
24
        res_offsets.resize(offset_size);
720
24
        res_data.reserve(str_data.size());
721
82
        for (size_t i = 0; i < offset_size; ++i) {
722
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
58
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
58
            if constexpr (is_ltrim) {
726
58
                str_begin =
727
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
58
            }
729
58
            if constexpr (is_rtrim) {
730
58
                str_end =
731
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
58
            }
733
734
58
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
58
        }
738
24
        return Status::OK();
739
24
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
27
        const size_t offset_size = str_offsets.size();
719
27
        res_offsets.resize(offset_size);
720
27
        res_data.reserve(str_data.size());
721
88
        for (size_t i = 0; i < offset_size; ++i) {
722
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
61
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
61
            if constexpr (is_ltrim) {
726
61
                str_begin =
727
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
61
            }
729
            if constexpr (is_rtrim) {
730
                str_end =
731
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
            }
733
734
61
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
61
        }
738
27
        return Status::OK();
739
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
717
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
718
45
        const size_t offset_size = str_offsets.size();
719
45
        res_offsets.resize(offset_size);
720
45
        res_data.reserve(str_data.size());
721
90
        for (size_t i = 0; i < offset_size; ++i) {
722
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
723
45
            const auto* str_end = str_data.data() + str_offsets[i];
724
725
            if constexpr (is_ltrim) {
726
                str_begin =
727
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
728
            }
729
45
            if constexpr (is_rtrim) {
730
45
                str_end =
731
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
732
45
            }
733
734
45
            res_data.insert_assume_reserved(str_begin, str_end);
735
            // The length of the result of the trim function will never exceed the length of the input.
736
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
737
45
        }
738
45
        return Status::OK();
739
45
    }
740
};
741
template <bool is_ltrim, bool is_rtrim, bool trim_single>
742
struct TrimInUtil {
743
    static Status vector(const ColumnString::Chars& str_data,
744
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
745
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
121
        const size_t offset_size = str_offsets.size();
747
121
        res_offsets.resize(offset_size);
748
121
        res_data.reserve(str_data.size());
749
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
121
                         simd::VStringFunctions::is_ascii(StringRef(
751
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
121
        if (all_ascii) {
754
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
68
        } else {
756
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
53
        }
758
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
43
        const size_t offset_size = str_offsets.size();
747
43
        res_offsets.resize(offset_size);
748
43
        res_data.reserve(str_data.size());
749
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
43
                         simd::VStringFunctions::is_ascii(StringRef(
751
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
43
        if (all_ascii) {
754
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
24
        } else {
756
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
19
        }
758
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
36
        const size_t offset_size = str_offsets.size();
747
36
        res_offsets.resize(offset_size);
748
36
        res_data.reserve(str_data.size());
749
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
36
                         simd::VStringFunctions::is_ascii(StringRef(
751
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
36
        if (all_ascii) {
754
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
19
        } else {
756
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
17
        }
758
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
745
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
746
42
        const size_t offset_size = str_offsets.size();
747
42
        res_offsets.resize(offset_size);
748
42
        res_data.reserve(str_data.size());
749
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
750
42
                         simd::VStringFunctions::is_ascii(StringRef(
751
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
752
753
42
        if (all_ascii) {
754
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
755
25
        } else {
756
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
757
17
        }
758
42
    }
759
760
private:
761
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
762
                                     const ColumnString::Offsets& str_offsets,
763
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
764
68
                                     ColumnString::Offsets& res_offsets) {
765
68
        const size_t offset_size = str_offsets.size();
766
68
        std::bitset<128> char_lookup;
767
68
        const char* remove_begin = remove_str.data;
768
68
        const char* remove_end = remove_str.data + remove_str.size;
769
770
251
        while (remove_begin < remove_end) {
771
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
183
            remove_begin += 1;
773
183
        }
774
775
136
        for (size_t i = 0; i < offset_size; ++i) {
776
68
            const char* str_begin =
777
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
68
            const char* left_trim_pos = str_begin;
780
68
            const char* right_trim_pos = str_end;
781
782
68
            if constexpr (is_ltrim) {
783
127
                while (left_trim_pos < str_end) {
784
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
30
                        break;
786
30
                    }
787
84
                    ++left_trim_pos;
788
84
                }
789
43
            }
790
791
68
            if constexpr (is_rtrim) {
792
114
                while (right_trim_pos > left_trim_pos) {
793
100
                    --right_trim_pos;
794
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
35
                        ++right_trim_pos;
796
35
                        break;
797
35
                    }
798
100
                }
799
49
            }
800
801
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
68
        }
805
806
68
        return Status::OK();
807
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
24
                                     ColumnString::Offsets& res_offsets) {
765
24
        const size_t offset_size = str_offsets.size();
766
24
        std::bitset<128> char_lookup;
767
24
        const char* remove_begin = remove_str.data;
768
24
        const char* remove_end = remove_str.data + remove_str.size;
769
770
86
        while (remove_begin < remove_end) {
771
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
62
            remove_begin += 1;
773
62
        }
774
775
48
        for (size_t i = 0; i < offset_size; ++i) {
776
24
            const char* str_begin =
777
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
24
            const char* left_trim_pos = str_begin;
780
24
            const char* right_trim_pos = str_end;
781
782
24
            if constexpr (is_ltrim) {
783
57
                while (left_trim_pos < str_end) {
784
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
17
                        break;
786
17
                    }
787
33
                    ++left_trim_pos;
788
33
                }
789
24
            }
790
791
24
            if constexpr (is_rtrim) {
792
39
                while (right_trim_pos > left_trim_pos) {
793
32
                    --right_trim_pos;
794
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
17
                        ++right_trim_pos;
796
17
                        break;
797
17
                    }
798
32
                }
799
24
            }
800
801
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
24
        }
805
806
24
        return Status::OK();
807
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
19
                                     ColumnString::Offsets& res_offsets) {
765
19
        const size_t offset_size = str_offsets.size();
766
19
        std::bitset<128> char_lookup;
767
19
        const char* remove_begin = remove_str.data;
768
19
        const char* remove_end = remove_str.data + remove_str.size;
769
770
73
        while (remove_begin < remove_end) {
771
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
54
            remove_begin += 1;
773
54
        }
774
775
38
        for (size_t i = 0; i < offset_size; ++i) {
776
19
            const char* str_begin =
777
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
19
            const char* left_trim_pos = str_begin;
780
19
            const char* right_trim_pos = str_end;
781
782
19
            if constexpr (is_ltrim) {
783
70
                while (left_trim_pos < str_end) {
784
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
13
                        break;
786
13
                    }
787
51
                    ++left_trim_pos;
788
51
                }
789
19
            }
790
791
            if constexpr (is_rtrim) {
792
                while (right_trim_pos > left_trim_pos) {
793
                    --right_trim_pos;
794
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
                        ++right_trim_pos;
796
                        break;
797
                    }
798
                }
799
            }
800
801
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
19
        }
805
806
19
        return Status::OK();
807
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
764
25
                                     ColumnString::Offsets& res_offsets) {
765
25
        const size_t offset_size = str_offsets.size();
766
25
        std::bitset<128> char_lookup;
767
25
        const char* remove_begin = remove_str.data;
768
25
        const char* remove_end = remove_str.data + remove_str.size;
769
770
92
        while (remove_begin < remove_end) {
771
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
772
67
            remove_begin += 1;
773
67
        }
774
775
50
        for (size_t i = 0; i < offset_size; ++i) {
776
25
            const char* str_begin =
777
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
778
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
779
25
            const char* left_trim_pos = str_begin;
780
25
            const char* right_trim_pos = str_end;
781
782
            if constexpr (is_ltrim) {
783
                while (left_trim_pos < str_end) {
784
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
785
                        break;
786
                    }
787
                    ++left_trim_pos;
788
                }
789
            }
790
791
25
            if constexpr (is_rtrim) {
792
75
                while (right_trim_pos > left_trim_pos) {
793
68
                    --right_trim_pos;
794
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
795
18
                        ++right_trim_pos;
796
18
                        break;
797
18
                    }
798
68
                }
799
25
            }
800
801
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
802
            // The length of the result of the trim function will never exceed the length of the input.
803
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
804
25
        }
805
806
25
        return Status::OK();
807
25
    }
808
809
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
810
                                    const ColumnString::Offsets& str_offsets,
811
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
812
53
                                    ColumnString::Offsets& res_offsets) {
813
53
        const size_t offset_size = str_offsets.size();
814
53
        res_offsets.resize(offset_size);
815
53
        res_data.reserve(str_data.size());
816
817
53
        std::unordered_set<std::string_view> char_lookup;
818
53
        const char* remove_begin = remove_str.data;
819
53
        const char* remove_end = remove_str.data + remove_str.size;
820
821
240
        while (remove_begin < remove_end) {
822
187
            size_t byte_len, char_len;
823
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
187
                    remove_begin, remove_end, 1);
825
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
187
            remove_begin += byte_len;
827
187
        }
828
829
140
        for (size_t i = 0; i < offset_size; ++i) {
830
87
            const char* str_begin =
831
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
87
            const char* left_trim_pos = str_begin;
834
87
            const char* right_trim_pos = str_end;
835
836
87
            if constexpr (is_ltrim) {
837
81
                while (left_trim_pos < str_end) {
838
73
                    size_t byte_len, char_len;
839
73
                    std::tie(byte_len, char_len) =
840
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
73
                                                                                   str_end, 1);
842
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
73
                        char_lookup.end()) {
844
52
                        break;
845
52
                    }
846
21
                    left_trim_pos += byte_len;
847
21
                }
848
60
            }
849
850
87
            if constexpr (is_rtrim) {
851
88
                while (right_trim_pos > left_trim_pos) {
852
80
                    const char* prev_char_pos = right_trim_pos;
853
156
                    do {
854
156
                        --prev_char_pos;
855
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
80
                        char_lookup.end()) {
859
52
                        break;
860
52
                    }
861
28
                    right_trim_pos = prev_char_pos;
862
28
                }
863
60
            }
864
865
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
87
        }
869
53
        return Status::OK();
870
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
19
                                    ColumnString::Offsets& res_offsets) {
813
19
        const size_t offset_size = str_offsets.size();
814
19
        res_offsets.resize(offset_size);
815
19
        res_data.reserve(str_data.size());
816
817
19
        std::unordered_set<std::string_view> char_lookup;
818
19
        const char* remove_begin = remove_str.data;
819
19
        const char* remove_end = remove_str.data + remove_str.size;
820
821
84
        while (remove_begin < remove_end) {
822
65
            size_t byte_len, char_len;
823
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
65
                    remove_begin, remove_end, 1);
825
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
65
            remove_begin += byte_len;
827
65
        }
828
829
52
        for (size_t i = 0; i < offset_size; ++i) {
830
33
            const char* str_begin =
831
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
33
            const char* left_trim_pos = str_begin;
834
33
            const char* right_trim_pos = str_end;
835
836
33
            if constexpr (is_ltrim) {
837
45
                while (left_trim_pos < str_end) {
838
41
                    size_t byte_len, char_len;
839
41
                    std::tie(byte_len, char_len) =
840
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
41
                                                                                   str_end, 1);
842
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
41
                        char_lookup.end()) {
844
29
                        break;
845
29
                    }
846
12
                    left_trim_pos += byte_len;
847
12
                }
848
33
            }
849
850
33
            if constexpr (is_rtrim) {
851
48
                while (right_trim_pos > left_trim_pos) {
852
44
                    const char* prev_char_pos = right_trim_pos;
853
90
                    do {
854
90
                        --prev_char_pos;
855
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
44
                        char_lookup.end()) {
859
29
                        break;
860
29
                    }
861
15
                    right_trim_pos = prev_char_pos;
862
15
                }
863
33
            }
864
865
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
33
        }
869
19
        return Status::OK();
870
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
17
                                    ColumnString::Offsets& res_offsets) {
813
17
        const size_t offset_size = str_offsets.size();
814
17
        res_offsets.resize(offset_size);
815
17
        res_data.reserve(str_data.size());
816
817
17
        std::unordered_set<std::string_view> char_lookup;
818
17
        const char* remove_begin = remove_str.data;
819
17
        const char* remove_end = remove_str.data + remove_str.size;
820
821
78
        while (remove_begin < remove_end) {
822
61
            size_t byte_len, char_len;
823
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
61
                    remove_begin, remove_end, 1);
825
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
61
            remove_begin += byte_len;
827
61
        }
828
829
44
        for (size_t i = 0; i < offset_size; ++i) {
830
27
            const char* str_begin =
831
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
27
            const char* left_trim_pos = str_begin;
834
27
            const char* right_trim_pos = str_end;
835
836
27
            if constexpr (is_ltrim) {
837
36
                while (left_trim_pos < str_end) {
838
32
                    size_t byte_len, char_len;
839
32
                    std::tie(byte_len, char_len) =
840
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
32
                                                                                   str_end, 1);
842
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
32
                        char_lookup.end()) {
844
23
                        break;
845
23
                    }
846
9
                    left_trim_pos += byte_len;
847
9
                }
848
27
            }
849
850
            if constexpr (is_rtrim) {
851
                while (right_trim_pos > left_trim_pos) {
852
                    const char* prev_char_pos = right_trim_pos;
853
                    do {
854
                        --prev_char_pos;
855
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
                        char_lookup.end()) {
859
                        break;
860
                    }
861
                    right_trim_pos = prev_char_pos;
862
                }
863
            }
864
865
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
27
        }
869
17
        return Status::OK();
870
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
812
17
                                    ColumnString::Offsets& res_offsets) {
813
17
        const size_t offset_size = str_offsets.size();
814
17
        res_offsets.resize(offset_size);
815
17
        res_data.reserve(str_data.size());
816
817
17
        std::unordered_set<std::string_view> char_lookup;
818
17
        const char* remove_begin = remove_str.data;
819
17
        const char* remove_end = remove_str.data + remove_str.size;
820
821
78
        while (remove_begin < remove_end) {
822
61
            size_t byte_len, char_len;
823
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
824
61
                    remove_begin, remove_end, 1);
825
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
826
61
            remove_begin += byte_len;
827
61
        }
828
829
44
        for (size_t i = 0; i < offset_size; ++i) {
830
27
            const char* str_begin =
831
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
832
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
833
27
            const char* left_trim_pos = str_begin;
834
27
            const char* right_trim_pos = str_end;
835
836
            if constexpr (is_ltrim) {
837
                while (left_trim_pos < str_end) {
838
                    size_t byte_len, char_len;
839
                    std::tie(byte_len, char_len) =
840
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
841
                                                                                   str_end, 1);
842
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
843
                        char_lookup.end()) {
844
                        break;
845
                    }
846
                    left_trim_pos += byte_len;
847
                }
848
            }
849
850
27
            if constexpr (is_rtrim) {
851
40
                while (right_trim_pos > left_trim_pos) {
852
36
                    const char* prev_char_pos = right_trim_pos;
853
66
                    do {
854
66
                        --prev_char_pos;
855
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
856
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
857
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
858
36
                        char_lookup.end()) {
859
23
                        break;
860
23
                    }
861
13
                    right_trim_pos = prev_char_pos;
862
13
                }
863
27
            }
864
865
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
866
            // The length of the result of the trim function will never exceed the length of the input.
867
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
868
27
        }
869
17
        return Status::OK();
870
17
    }
871
};
872
// This is an implementation of a parameter for the Trim function.
873
template <bool is_ltrim, bool is_rtrim, typename Name>
874
struct Trim1Impl {
875
    static constexpr auto name = Name::name;
876
877
157
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
45
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
35
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
877
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
877
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
878
879
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
880
137
                          uint32_t result, size_t input_rows_count) {
881
137
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
139
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
139
            auto col_res = ColumnString::create();
884
139
            char blank[] = " ";
885
139
            const StringRef remove_str(blank, 1);
886
139
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
139
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
139
                    col_res->get_offsets())));
889
139
            block.replace_by_position(result, std::move(col_res));
890
18.4E
        } else {
891
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
893
18.4E
                                        name);
894
18.4E
        }
895
139
        return Status::OK();
896
137
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
47
                          uint32_t result, size_t input_rows_count) {
881
47
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
48
            auto col_res = ColumnString::create();
884
48
            char blank[] = " ";
885
48
            const StringRef remove_str(blank, 1);
886
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
48
                    col_res->get_offsets())));
889
48
            block.replace_by_position(result, std::move(col_res));
890
18.4E
        } else {
891
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
893
18.4E
                                        name);
894
18.4E
        }
895
48
        return Status::OK();
896
47
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
37
                          uint32_t result, size_t input_rows_count) {
881
37
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
37
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
37
            auto col_res = ColumnString::create();
884
37
            char blank[] = " ";
885
37
            const StringRef remove_str(blank, 1);
886
37
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
37
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
37
                    col_res->get_offsets())));
889
37
            block.replace_by_position(result, std::move(col_res));
890
37
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
37
        return Status::OK();
896
37
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
41
                          uint32_t result, size_t input_rows_count) {
881
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
42
            auto col_res = ColumnString::create();
884
42
            char blank[] = " ";
885
42
            const StringRef remove_str(blank, 1);
886
42
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
42
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
42
                    col_res->get_offsets())));
889
42
            block.replace_by_position(result, std::move(col_res));
890
18.4E
        } else {
891
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
893
18.4E
                                        name);
894
18.4E
        }
895
42
        return Status::OK();
896
41
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
1
                          uint32_t result, size_t input_rows_count) {
881
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
1
            auto col_res = ColumnString::create();
884
1
            char blank[] = " ";
885
1
            const StringRef remove_str(blank, 1);
886
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
1
                    col_res->get_offsets())));
889
1
            block.replace_by_position(result, std::move(col_res));
890
1
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
1
        return Status::OK();
896
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
5
                          uint32_t result, size_t input_rows_count) {
881
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
5
            auto col_res = ColumnString::create();
884
5
            char blank[] = " ";
885
5
            const StringRef remove_str(blank, 1);
886
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
5
                    col_res->get_offsets())));
889
5
            block.replace_by_position(result, std::move(col_res));
890
5
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
5
        return Status::OK();
896
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
880
6
                          uint32_t result, size_t input_rows_count) {
881
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
882
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
883
6
            auto col_res = ColumnString::create();
884
6
            char blank[] = " ";
885
6
            const StringRef remove_str(blank, 1);
886
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
887
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
888
6
                    col_res->get_offsets())));
889
6
            block.replace_by_position(result, std::move(col_res));
890
6
        } else {
891
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
892
0
                                        block.get_by_position(arguments[0]).column->get_name(),
893
0
                                        name);
894
0
        }
895
6
        return Status::OK();
896
6
    }
897
};
898
899
// This is an implementation of two parameters for the Trim function.
900
template <bool is_ltrim, bool is_rtrim, typename Name>
901
struct Trim2Impl {
902
    static constexpr auto name = Name::name;
903
904
226
    static DataTypes get_variadic_argument_types() {
905
226
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
226
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
20
    static DataTypes get_variadic_argument_types() {
905
20
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
20
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
29
    static DataTypes get_variadic_argument_types() {
905
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
904
84
    static DataTypes get_variadic_argument_types() {
905
84
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
84
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
27
    static DataTypes get_variadic_argument_types() {
905
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
29
    static DataTypes get_variadic_argument_types() {
905
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
904
37
    static DataTypes get_variadic_argument_types() {
905
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
906
37
    }
907
908
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
909
281
                          uint32_t result, size_t input_rows_count) {
910
281
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
281
        const auto& rcol =
912
281
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
281
                        ->get_data_column_ptr();
914
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
282
                auto col_res = ColumnString::create();
917
282
                const auto* remove_str_raw = col_right->get_chars().data();
918
282
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
282
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
282
                if (remove_str.size == 1) {
922
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
65
                            col_res->get_offsets())));
925
217
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
121
                                  std::is_same<Name, NameRTrimIn>::value) {
929
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
121
                                col->get_chars(), col->get_offsets(), remove_str,
931
121
                                col_res->get_chars(), col_res->get_offsets())));
932
121
                    } else {
933
96
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
96
                                col->get_chars(), col->get_offsets(), remove_str,
935
96
                                col_res->get_chars(), col_res->get_offsets())));
936
96
                    }
937
217
                }
938
282
                block.replace_by_position(result, std::move(col_res));
939
282
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
18.4E
        } else {
946
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
948
18.4E
                                        name);
949
18.4E
        }
950
282
        return Status::OK();
951
281
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
25
                          uint32_t result, size_t input_rows_count) {
910
25
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
25
        const auto& rcol =
912
25
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
25
                        ->get_data_column_ptr();
914
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
26
                auto col_res = ColumnString::create();
917
26
                const auto* remove_str_raw = col_right->get_chars().data();
918
26
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
26
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
26
                if (remove_str.size == 1) {
922
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
2
                            col_res->get_offsets())));
925
24
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
24
                    } else {
933
24
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
24
                                col->get_chars(), col->get_offsets(), remove_str,
935
24
                                col_res->get_chars(), col_res->get_offsets())));
936
24
                    }
937
24
                }
938
26
                block.replace_by_position(result, std::move(col_res));
939
26
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
18.4E
        } else {
946
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
948
18.4E
                                        name);
949
18.4E
        }
950
26
        return Status::OK();
951
25
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
32
                          uint32_t result, size_t input_rows_count) {
910
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
32
        const auto& rcol =
912
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
32
                        ->get_data_column_ptr();
914
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
32
                auto col_res = ColumnString::create();
917
32
                const auto* remove_str_raw = col_right->get_chars().data();
918
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
32
                if (remove_str.size == 1) {
922
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
5
                            col_res->get_offsets())));
925
27
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
27
                    } else {
933
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
27
                                col->get_chars(), col->get_offsets(), remove_str,
935
27
                                col_res->get_chars(), col_res->get_offsets())));
936
27
                    }
937
27
                }
938
32
                block.replace_by_position(result, std::move(col_res));
939
32
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
32
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
32
        return Status::OK();
951
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
85
                          uint32_t result, size_t input_rows_count) {
910
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
85
        const auto& rcol =
912
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
85
                        ->get_data_column_ptr();
914
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
85
                auto col_res = ColumnString::create();
917
85
                const auto* remove_str_raw = col_right->get_chars().data();
918
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
85
                if (remove_str.size == 1) {
922
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
40
                            col_res->get_offsets())));
925
45
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
                                  std::is_same<Name, NameRTrimIn>::value) {
929
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
                                col->get_chars(), col->get_offsets(), remove_str,
931
                                col_res->get_chars(), col_res->get_offsets())));
932
45
                    } else {
933
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
45
                                col->get_chars(), col->get_offsets(), remove_str,
935
45
                                col_res->get_chars(), col_res->get_offsets())));
936
45
                    }
937
45
                }
938
85
                block.replace_by_position(result, std::move(col_res));
939
85
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
85
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
85
        return Status::OK();
951
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
50
                          uint32_t result, size_t input_rows_count) {
910
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
50
        const auto& rcol =
912
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
50
                        ->get_data_column_ptr();
914
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
50
                auto col_res = ColumnString::create();
917
50
                const auto* remove_str_raw = col_right->get_chars().data();
918
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
50
                if (remove_str.size == 1) {
922
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
7
                            col_res->get_offsets())));
925
43
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
43
                                  std::is_same<Name, NameRTrimIn>::value) {
929
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
43
                                col->get_chars(), col->get_offsets(), remove_str,
931
43
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
43
                }
938
50
                block.replace_by_position(result, std::move(col_res));
939
50
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
50
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
50
        return Status::OK();
951
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
41
                          uint32_t result, size_t input_rows_count) {
910
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
41
        const auto& rcol =
912
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
41
                        ->get_data_column_ptr();
914
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
41
                auto col_res = ColumnString::create();
917
41
                const auto* remove_str_raw = col_right->get_chars().data();
918
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
41
                if (remove_str.size == 1) {
922
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
5
                            col_res->get_offsets())));
925
36
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
36
                                  std::is_same<Name, NameRTrimIn>::value) {
929
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
36
                                col->get_chars(), col->get_offsets(), remove_str,
931
36
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
36
                }
938
41
                block.replace_by_position(result, std::move(col_res));
939
41
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
41
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
41
        return Status::OK();
951
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
909
48
                          uint32_t result, size_t input_rows_count) {
910
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
911
48
        const auto& rcol =
912
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
913
48
                        ->get_data_column_ptr();
914
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
915
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
916
48
                auto col_res = ColumnString::create();
917
48
                const auto* remove_str_raw = col_right->get_chars().data();
918
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
919
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
920
921
48
                if (remove_str.size == 1) {
922
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
923
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
924
6
                            col_res->get_offsets())));
925
42
                } else {
926
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
927
                                  std::is_same<Name, NameLTrimIn>::value ||
928
42
                                  std::is_same<Name, NameRTrimIn>::value) {
929
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
930
42
                                col->get_chars(), col->get_offsets(), remove_str,
931
42
                                col_res->get_chars(), col_res->get_offsets())));
932
                    } else {
933
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
934
                                col->get_chars(), col->get_offsets(), remove_str,
935
                                col_res->get_chars(), col_res->get_offsets())));
936
                    }
937
42
                }
938
48
                block.replace_by_position(result, std::move(col_res));
939
48
            } else {
940
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
941
0
                                            block.get_by_position(arguments[1]).column->get_name(),
942
0
                                            name);
943
0
            }
944
945
48
        } else {
946
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
947
0
                                        block.get_by_position(arguments[0]).column->get_name(),
948
0
                                        name);
949
0
        }
950
48
        return Status::OK();
951
48
    }
952
};
953
954
template <typename impl>
955
class FunctionTrim : public IFunction {
956
public:
957
    static constexpr auto name = impl::name;
958
395
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
958
46
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
958
36
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
958
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
958
21
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
958
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
958
85
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
958
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
958
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
958
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
958
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
958
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
958
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
959
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
959
1
    String get_name() const override { return impl::name; }
960
961
287
    size_t get_number_of_arguments() const override {
962
287
        return get_variadic_argument_types_impl().size();
963
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
37
    size_t get_number_of_arguments() const override {
962
37
        return get_variadic_argument_types_impl().size();
963
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
27
    size_t get_number_of_arguments() const override {
962
27
        return get_variadic_argument_types_impl().size();
963
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
33
    size_t get_number_of_arguments() const override {
962
33
        return get_variadic_argument_types_impl().size();
963
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
12
    size_t get_number_of_arguments() const override {
962
12
        return get_variadic_argument_types_impl().size();
963
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
21
    size_t get_number_of_arguments() const override {
962
21
        return get_variadic_argument_types_impl().size();
963
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
961
76
    size_t get_number_of_arguments() const override {
962
76
        return get_variadic_argument_types_impl().size();
963
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
1
    size_t get_number_of_arguments() const override {
962
1
        return get_variadic_argument_types_impl().size();
963
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
5
    size_t get_number_of_arguments() const override {
962
5
        return get_variadic_argument_types_impl().size();
963
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
6
    size_t get_number_of_arguments() const override {
962
6
        return get_variadic_argument_types_impl().size();
963
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
19
    size_t get_number_of_arguments() const override {
962
19
        return get_variadic_argument_types_impl().size();
963
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
21
    size_t get_number_of_arguments() const override {
962
21
        return get_variadic_argument_types_impl().size();
963
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
961
29
    size_t get_number_of_arguments() const override {
962
29
        return get_variadic_argument_types_impl().size();
963
29
    }
964
965
287
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
287
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
287
        return arguments[0];
972
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
37
        return arguments[0];
972
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
27
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
27
        return arguments[0];
972
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
33
        return arguments[0];
972
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
12
        return arguments[0];
972
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
21
        return arguments[0];
972
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
76
        return arguments[0];
972
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
1
        return arguments[0];
972
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
5
        return arguments[0];
972
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
6
        return arguments[0];
972
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
19
        return arguments[0];
972
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
21
        return arguments[0];
972
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
965
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
966
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
967
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
968
0
                                   "Illegal type {} of argument of function {}",
969
0
                                   arguments[0]->get_name(), get_name());
970
0
        }
971
29
        return arguments[0];
972
29
    }
973
    // The second parameter of "trim" is a constant.
974
570
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
58
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
37
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
974
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
975
976
383
    DataTypes get_variadic_argument_types_impl() const override {
977
383
        return impl::get_variadic_argument_types();
978
383
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
45
    DataTypes get_variadic_argument_types_impl() const override {
977
45
        return impl::get_variadic_argument_types();
978
45
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
35
    DataTypes get_variadic_argument_types_impl() const override {
977
35
        return impl::get_variadic_argument_types();
978
35
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
41
    DataTypes get_variadic_argument_types_impl() const override {
977
41
        return impl::get_variadic_argument_types();
978
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
20
    DataTypes get_variadic_argument_types_impl() const override {
977
20
        return impl::get_variadic_argument_types();
978
20
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
29
    DataTypes get_variadic_argument_types_impl() const override {
977
29
        return impl::get_variadic_argument_types();
978
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
84
    DataTypes get_variadic_argument_types_impl() const override {
977
84
        return impl::get_variadic_argument_types();
978
84
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
9
    DataTypes get_variadic_argument_types_impl() const override {
977
9
        return impl::get_variadic_argument_types();
978
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
13
    DataTypes get_variadic_argument_types_impl() const override {
977
13
        return impl::get_variadic_argument_types();
978
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
14
    DataTypes get_variadic_argument_types_impl() const override {
977
14
        return impl::get_variadic_argument_types();
978
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
27
    DataTypes get_variadic_argument_types_impl() const override {
977
27
        return impl::get_variadic_argument_types();
978
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
29
    DataTypes get_variadic_argument_types_impl() const override {
977
29
        return impl::get_variadic_argument_types();
978
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
976
37
    DataTypes get_variadic_argument_types_impl() const override {
977
37
        return impl::get_variadic_argument_types();
978
37
    }
979
980
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
981
421
                        uint32_t result, size_t input_rows_count) const override {
982
421
        return impl::execute(context, block, arguments, result, input_rows_count);
983
421
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
48
                        uint32_t result, size_t input_rows_count) const override {
982
48
        return impl::execute(context, block, arguments, result, input_rows_count);
983
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
37
                        uint32_t result, size_t input_rows_count) const override {
982
37
        return impl::execute(context, block, arguments, result, input_rows_count);
983
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
42
                        uint32_t result, size_t input_rows_count) const override {
982
42
        return impl::execute(context, block, arguments, result, input_rows_count);
983
42
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
26
                        uint32_t result, size_t input_rows_count) const override {
982
26
        return impl::execute(context, block, arguments, result, input_rows_count);
983
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
32
                        uint32_t result, size_t input_rows_count) const override {
982
32
        return impl::execute(context, block, arguments, result, input_rows_count);
983
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
85
                        uint32_t result, size_t input_rows_count) const override {
982
85
        return impl::execute(context, block, arguments, result, input_rows_count);
983
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
1
                        uint32_t result, size_t input_rows_count) const override {
982
1
        return impl::execute(context, block, arguments, result, input_rows_count);
983
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
5
                        uint32_t result, size_t input_rows_count) const override {
982
5
        return impl::execute(context, block, arguments, result, input_rows_count);
983
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
6
                        uint32_t result, size_t input_rows_count) const override {
982
6
        return impl::execute(context, block, arguments, result, input_rows_count);
983
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
50
                        uint32_t result, size_t input_rows_count) const override {
982
50
        return impl::execute(context, block, arguments, result, input_rows_count);
983
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
41
                        uint32_t result, size_t input_rows_count) const override {
982
41
        return impl::execute(context, block, arguments, result, input_rows_count);
983
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
981
48
                        uint32_t result, size_t input_rows_count) const override {
982
48
        return impl::execute(context, block, arguments, result, input_rows_count);
983
48
    }
984
};
985
986
struct UnHexImplEmpty {
987
    static constexpr auto name = "unhex";
988
};
989
990
struct UnHexImplNull {
991
    static constexpr auto name = "unhex_null";
992
};
993
994
template <typename Name>
995
struct UnHexImpl {
996
    static constexpr auto name = Name::name;
997
    using ReturnType = DataTypeString;
998
    using ColumnType = ColumnString;
999
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1000
1001
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1002
160
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1003
160
        auto rows_count = offsets.size();
1004
160
        dst_offsets.resize(rows_count);
1005
1006
160
        int64_t total_size = 0;
1007
368
        for (size_t i = 0; i < rows_count; i++) {
1008
208
            size_t len = offsets[i] - offsets[i - 1];
1009
208
            total_size += len / 2;
1010
208
        }
1011
160
        ColumnString::check_chars_length(total_size, rows_count);
1012
160
        dst_data.resize(total_size);
1013
160
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1014
160
        size_t offset = 0;
1015
1016
368
        for (int i = 0; i < rows_count; ++i) {
1017
208
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1018
208
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1019
1020
208
            if (UNLIKELY(srclen == 0)) {
1021
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1022
13
                continue;
1023
13
            }
1024
1025
195
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1026
1027
195
            offset += outlen;
1028
195
            dst_offsets[i] = cast_set<uint32_t>(offset);
1029
195
        }
1030
160
        dst_data.pop_back(total_size - offset);
1031
160
        return Status::OK();
1032
160
    }
1033
1034
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1035
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1036
33
                         ColumnUInt8::Container* null_map_data) {
1037
33
        auto rows_count = offsets.size();
1038
33
        dst_offsets.resize(rows_count);
1039
1040
33
        int64_t total_size = 0;
1041
84
        for (size_t i = 0; i < rows_count; i++) {
1042
51
            size_t len = offsets[i] - offsets[i - 1];
1043
51
            total_size += len / 2;
1044
51
        }
1045
33
        ColumnString::check_chars_length(total_size, rows_count);
1046
33
        dst_data.resize(total_size);
1047
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1048
33
        size_t offset = 0;
1049
1050
84
        for (int i = 0; i < rows_count; ++i) {
1051
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1052
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1053
1054
51
            if (UNLIKELY(srclen == 0)) {
1055
7
                (*null_map_data)[i] = 1;
1056
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1057
7
                continue;
1058
7
            }
1059
1060
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1061
1062
44
            if (outlen == 0) {
1063
13
                (*null_map_data)[i] = 1;
1064
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1065
13
                continue;
1066
13
            }
1067
1068
31
            offset += outlen;
1069
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1070
31
        }
1071
33
        dst_data.pop_back(total_size - offset);
1072
33
        return Status::OK();
1073
33
    }
1074
};
1075
1076
struct NameStringSpace {
1077
    static constexpr auto name = "space";
1078
};
1079
1080
struct StringSpace {
1081
    using ReturnType = DataTypeString;
1082
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1083
    using Type = Int32;
1084
    using ReturnColumnType = ColumnString;
1085
1086
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1087
10
                         ColumnString::Offsets& res_offsets) {
1088
10
        res_offsets.resize(data.size());
1089
10
        size_t input_size = res_offsets.size();
1090
10
        int64_t total_size = 0;
1091
34
        for (size_t i = 0; i < input_size; ++i) {
1092
24
            if (data[i] > 0) {
1093
14
                total_size += data[i];
1094
14
            }
1095
24
        }
1096
10
        ColumnString::check_chars_length(total_size, input_size);
1097
10
        res_data.reserve(total_size);
1098
1099
34
        for (size_t i = 0; i < input_size; ++i) {
1100
24
            if (data[i] > 0) [[likely]] {
1101
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1102
14
                cast_set(res_offsets[i], res_data.size());
1103
14
            } else {
1104
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1105
10
            }
1106
24
        }
1107
10
        return Status::OK();
1108
10
    }
1109
};
1110
1111
struct ToBase64Impl {
1112
    static constexpr auto name = "to_base64";
1113
    using ReturnType = DataTypeString;
1114
    using ColumnType = ColumnString;
1115
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1116
1117
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1118
107
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1119
107
        auto rows_count = offsets.size();
1120
107
        dst_offsets.resize(rows_count);
1121
1122
107
        size_t total_size = 0;
1123
250
        for (size_t i = 0; i < rows_count; i++) {
1124
143
            size_t len = offsets[i] - offsets[i - 1];
1125
143
            total_size += 4 * ((len + 2) / 3);
1126
143
        }
1127
107
        ColumnString::check_chars_length(total_size, rows_count);
1128
107
        dst_data.resize(total_size);
1129
107
        auto* dst_data_ptr = dst_data.data();
1130
107
        size_t offset = 0;
1131
1132
250
        for (int i = 0; i < rows_count; ++i) {
1133
143
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1134
143
            size_t srclen = offsets[i] - offsets[i - 1];
1135
1136
143
            if (UNLIKELY(srclen == 0)) {
1137
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1138
7
                continue;
1139
7
            }
1140
1141
136
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1142
136
                                               (unsigned char*)(dst_data_ptr + offset));
1143
1144
136
            offset += outlen;
1145
136
            dst_offsets[i] = cast_set<uint32_t>(offset);
1146
136
        }
1147
107
        dst_data.pop_back(total_size - offset);
1148
107
        return Status::OK();
1149
107
    }
1150
};
1151
1152
struct FromBase64Impl {
1153
    static constexpr auto name = "from_base64";
1154
    using ReturnType = DataTypeString;
1155
    using ColumnType = ColumnString;
1156
1157
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1158
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1159
109
                         NullMap& null_map) {
1160
109
        auto rows_count = offsets.size();
1161
109
        dst_offsets.resize(rows_count);
1162
1163
109
        size_t total_size = 0;
1164
271
        for (size_t i = 0; i < rows_count; i++) {
1165
162
            auto len = offsets[i] - offsets[i - 1];
1166
162
            total_size += len / 4 * 3;
1167
162
        }
1168
109
        ColumnString::check_chars_length(total_size, rows_count);
1169
109
        dst_data.resize(total_size);
1170
109
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1171
109
        size_t offset = 0;
1172
1173
271
        for (int i = 0; i < rows_count; ++i) {
1174
162
            if (UNLIKELY(null_map[i])) {
1175
0
                null_map[i] = 1;
1176
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1177
0
                continue;
1178
0
            }
1179
1180
162
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1181
162
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1182
1183
162
            if (UNLIKELY(srclen == 0)) {
1184
6
                dst_offsets[i] = cast_set<uint32_t>(offset);
1185
6
                continue;
1186
6
            }
1187
1188
156
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1189
1190
156
            if (outlen < 0) {
1191
60
                null_map[i] = 1;
1192
60
                dst_offsets[i] = cast_set<uint32_t>(offset);
1193
96
            } else {
1194
96
                offset += outlen;
1195
96
                dst_offsets[i] = cast_set<uint32_t>(offset);
1196
96
            }
1197
156
        }
1198
109
        dst_data.pop_back(total_size - offset);
1199
109
        return Status::OK();
1200
109
    }
1201
};
1202
1203
struct StringAppendTrailingCharIfAbsent {
1204
    static constexpr auto name = "append_trailing_char_if_absent";
1205
    using Chars = ColumnString::Chars;
1206
    using Offsets = ColumnString::Offsets;
1207
    using ReturnType = DataTypeString;
1208
    using ColumnType = ColumnString;
1209
1210
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1211
48
        if (str.size < end.size) {
1212
11
            return false;
1213
11
        }
1214
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1215
37
        return str.end_with(end);
1216
48
    }
1217
1218
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1219
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1220
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1221
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1222
56
        size_t input_rows_count = loffsets.size();
1223
56
        res_offsets.resize(input_rows_count);
1224
56
        fmt::memory_buffer buffer;
1225
1226
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1227
102
            buffer.clear();
1228
1229
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1230
102
                                       loffsets[i] - loffsets[i - 1]);
1231
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1232
102
                                       roffsets[i] - roffsets[i - 1]);
1233
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1234
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1235
102
                    rstr.begin(), rstr.end(), 2);
1236
1237
102
            if (char_len != 1) {
1238
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1239
66
                continue;
1240
66
            }
1241
36
            if (str_end_with(lstr, rstr)) {
1242
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1243
9
                continue;
1244
9
            }
1245
1246
27
            buffer.append(lstr.begin(), lstr.end());
1247
27
            buffer.append(rstr.begin(), rstr.end());
1248
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1249
27
                                        res_offsets);
1250
27
        }
1251
56
    }
1252
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1253
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1254
8
                              NullMap& null_map_data) {
1255
8
        size_t input_rows_count = loffsets.size();
1256
8
        res_offsets.resize(input_rows_count);
1257
8
        fmt::memory_buffer buffer;
1258
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1259
8
        auto [byte_len, char_len] =
1260
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1261
8
        if (char_len != 1) {
1262
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1263
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1264
2
            }
1265
2
            return;
1266
2
        }
1267
1268
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1269
6
            buffer.clear();
1270
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1271
6
                                       loffsets[i] - loffsets[i - 1]);
1272
1273
6
            if (str_end_with(lstr, rstr)) {
1274
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1275
2
                continue;
1276
2
            }
1277
1278
4
            buffer.append(lstr.begin(), lstr.end());
1279
4
            buffer.append(rstr.begin(), rstr.end());
1280
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1281
4
                                        res_offsets);
1282
4
        }
1283
6
    }
1284
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1285
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1286
8
                              NullMap& null_map_data) {
1287
8
        size_t input_rows_count = roffsets.size();
1288
8
        res_offsets.resize(input_rows_count);
1289
8
        fmt::memory_buffer buffer;
1290
1291
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1292
8
            buffer.clear();
1293
1294
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1295
8
                                       roffsets[i] - roffsets[i - 1]);
1296
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1297
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1298
8
                    rstr.begin(), rstr.end(), 2);
1299
1300
8
            if (char_len != 1) {
1301
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1302
2
                continue;
1303
2
            }
1304
6
            if (str_end_with(lstr, rstr)) {
1305
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1306
2
                continue;
1307
2
            }
1308
1309
4
            buffer.append(lstr.begin(), lstr.end());
1310
4
            buffer.append(rstr.begin(), rstr.end());
1311
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1312
4
                                        res_offsets);
1313
4
        }
1314
8
    }
1315
};
1316
1317
struct StringLPad {
1318
    static constexpr auto name = "lpad";
1319
    static constexpr auto is_lpad = true;
1320
};
1321
1322
struct StringRPad {
1323
    static constexpr auto name = "rpad";
1324
    static constexpr auto is_lpad = false;
1325
};
1326
1327
template <typename LeftDataType, typename RightDataType>
1328
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1329
1330
template <typename LeftDataType, typename RightDataType>
1331
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1332
1333
template <typename LeftDataType, typename RightDataType>
1334
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1335
1336
// ready for regist function
1337
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1338
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1339
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1340
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1341
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1342
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1343
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1344
using FunctionStringStartsWith =
1345
        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
1346
using FunctionStringEndsWith =
1347
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1348
using FunctionStringInstr =
1349
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1350
using FunctionStringLocate =
1351
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1352
using FunctionStringFindInSet =
1353
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1354
1355
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1356
1357
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1358
1359
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1360
1361
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1362
1363
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1364
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1365
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1366
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1367
1368
using FunctionStringAppendTrailingCharIfAbsent =
1369
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1370
1371
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1372
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1373
1374
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1375
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1376
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1377
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1378
extern void register_function_string_search(SimpleFunctionFactory& factory);
1379
extern void register_function_string_url(SimpleFunctionFactory& factory);
1380
1381
8
void register_function_string(SimpleFunctionFactory& factory) {
1382
8
    register_function_string_basic(factory);
1383
8
    register_function_string_digest(factory);
1384
8
    register_function_string_mask(factory);
1385
8
    register_function_string_misc(factory);
1386
8
    register_function_string_search(factory);
1387
8
    register_function_string_url(factory);
1388
1389
8
    factory.register_function<FunctionStringParseDataSize>();
1390
8
    factory.register_function<FunctionStringASCII>();
1391
8
    factory.register_function<FunctionStringLength>();
1392
8
    factory.register_function<FunctionCrc32>();
1393
8
    factory.register_function<FunctionStringUTF8Length>();
1394
8
    factory.register_function<FunctionStringSpace>();
1395
8
    factory.register_function<FunctionStringStartsWith>();
1396
8
    factory.register_function<FunctionStringEndsWith>();
1397
8
    factory.register_function<FunctionStringInstr>();
1398
8
    factory.register_function<FunctionStringFindInSet>();
1399
8
    factory.register_function<FunctionStringLocate>();
1400
8
    factory.register_function<FunctionQuote>();
1401
8
    factory.register_function<FunctionReverseCommon>();
1402
8
    factory.register_function<FunctionUnHex>();
1403
8
    factory.register_function<FunctionUnHexNullable>();
1404
8
    factory.register_function<FunctionToLower>();
1405
8
    factory.register_function<FunctionToUpper>();
1406
8
    factory.register_function<FunctionToInitcap>();
1407
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1408
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1409
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1410
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1411
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1412
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1413
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1414
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1415
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1416
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1417
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1418
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1419
8
    factory.register_function<FunctionStringConcat>();
1420
8
    factory.register_function<FunctionStringElt>();
1421
8
    factory.register_function<FunctionStringConcatWs>();
1422
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1423
8
    factory.register_function<FunctionStringRepeat>();
1424
8
    factory.register_function<FunctionStringLPad>();
1425
8
    factory.register_function<FunctionStringRPad>();
1426
8
    factory.register_function<FunctionToBase64>();
1427
8
    factory.register_function<FunctionFromBase64>();
1428
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1429
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1430
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1431
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1432
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1433
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1434
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1435
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1436
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1437
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1438
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1439
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1440
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1441
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1442
8
    factory.register_function<
1443
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1444
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1445
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1446
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1447
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1448
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1449
8
    factory.register_function<FunctionOverlay>();
1450
8
    factory.register_function<FunctionIsValidUTF8>();
1451
1452
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1453
8
    factory.register_alias(FunctionToLower::name, "lcase");
1454
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1455
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1456
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1457
8
    factory.register_alias(FunctionOverlay::name, "insert");
1458
8
}
1459
1460
} // namespace doris