Coverage Report

Created: 2026-05-19 18:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <cstddef>
28
#include <cstdint>
29
#include <string_view>
30
31
#include "common/cast_set.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "core/column/column_string.h"
35
#include "core/pod_array_fwd.h"
36
#include "core/string_ref.h"
37
#include "exprs/function/function_reverse.h"
38
#include "exprs/function/function_string_concat.h"
39
#include "exprs/function/function_string_format.h"
40
#include "exprs/function/function_string_replace.h"
41
#include "exprs/function/function_string_to_string.h"
42
#include "exprs/function/function_totype.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/function/string_hex_util.h"
45
#include "exprs/segment_filter_helpers.h"
46
#include "util/string_search.hpp"
47
#include "util/url_coding.h"
48
#include "util/utf8_check.h"
49
50
namespace doris {
51
struct NameStringASCII {
52
    static constexpr auto name = "ascii";
53
};
54
55
struct StringASCII {
56
    using ReturnType = DataTypeInt32;
57
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
58
    using Type = String;
59
    using ReturnColumnType = ColumnInt32;
60
61
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
62
54
                         PaddedPODArray<Int32>& res) {
63
54
        auto size = offsets.size();
64
54
        res.resize(size);
65
152
        for (int i = 0; i < size; ++i) {
66
98
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
67
98
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
68
98
        }
69
54
        return Status::OK();
70
54
    }
71
};
72
73
struct NameParseDataSize {
74
    static constexpr auto name = "parse_data_size";
75
};
76
77
static const std::map<std::string_view, Int128> UNITS = {
78
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
79
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
80
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
81
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
82
        {"YB", static_cast<Int128>(1) << 80}};
83
84
struct ParseDataSize {
85
    using ReturnType = DataTypeInt128;
86
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
87
    using Type = String;
88
    using ReturnColumnType = ColumnInt128;
89
90
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
91
48
                         PaddedPODArray<Int128>& res) {
92
48
        auto size = offsets.size();
93
48
        res.resize(size);
94
100
        for (int i = 0; i < size; ++i) {
95
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
96
52
            int str_size = offsets[i] - offsets[i - 1];
97
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
98
52
        }
99
48
        return Status::OK();
100
48
    }
101
102
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
103
52
        int digit_length = 0;
104
216
        for (char c : dataSize) {
105
216
            if (isdigit(c) || c == '.') {
106
166
                digit_length++;
107
166
            } else {
108
50
                break;
109
50
            }
110
216
        }
111
112
52
        if (digit_length == 0) {
113
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
114
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
115
4
                                   dataSize);
116
4
        }
117
        // 123.45MB--->123.45 : MB
118
48
        double value = 0.0;
119
48
        try {
120
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
121
48
        } catch (const std::exception& e) {
122
0
            throw doris::Exception(
123
0
                    ErrorCode::INVALID_ARGUMENT,
124
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
125
0
                    dataSize, e.what());
126
0
        }
127
48
        auto unit = dataSize.substr(digit_length);
128
48
        auto it = UNITS.find(unit);
129
48
        if (it != UNITS.end()) {
130
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
131
45
        } else {
132
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
133
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
134
3
                                   dataSize);
135
3
        }
136
48
    }
137
};
138
139
struct NameQuote {
140
    static constexpr auto name = "quote";
141
};
142
143
struct NameQuoteImpl {
144
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
145
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
146
17
        size_t offset_size = offsets.size();
147
17
        ColumnString::Offset pos = 0;
148
17
        res_offsets.resize(offset_size);
149
17
        res_data.resize(data.size() + offset_size * 2);
150
45
        for (int i = 0; i < offset_size; i++) {
151
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
152
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
153
28
            res_data[pos] = '\'';
154
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
155
28
            res_data[pos + size + 1] = '\'';
156
28
            pos += size + 2;
157
28
            res_offsets[i] = pos;
158
28
        }
159
17
        return Status::OK();
160
17
    }
161
};
162
163
struct NameStringLength {
164
    static constexpr auto name = "length";
165
};
166
167
struct StringLengthImpl {
168
    using ReturnType = DataTypeInt32;
169
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
170
    using Type = String;
171
    using ReturnColumnType = ColumnInt32;
172
173
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
174
10.8k
                         PaddedPODArray<Int32>& res) {
175
10.8k
        auto size = offsets.size();
176
10.8k
        res.resize(size);
177
8.26M
        for (int i = 0; i < size; ++i) {
178
8.25M
            int str_size = offsets[i] - offsets[i - 1];
179
8.25M
            res[i] = str_size;
180
8.25M
        }
181
10.8k
        return Status::OK();
182
10.8k
    }
183
};
184
185
struct NameCrc32 {
186
    static constexpr auto name = "crc32";
187
};
188
189
struct Crc32Impl {
190
    using ReturnType = DataTypeInt64;
191
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
192
    using Type = String;
193
    using ReturnColumnType = ColumnInt64;
194
195
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
196
3
                         PaddedPODArray<Int64>& res) {
197
3
        auto size = offsets.size();
198
3
        res.resize(size);
199
6
        for (int i = 0; i < size; ++i) {
200
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
201
3
                             offsets[i] - offsets[i - 1]);
202
3
        }
203
3
        return Status::OK();
204
3
    }
205
};
206
207
struct NameStringUtf8Length {
208
    static constexpr auto name = "char_length";
209
};
210
211
struct StringUtf8LengthImpl {
212
    using ReturnType = DataTypeInt32;
213
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
214
    using Type = String;
215
    using ReturnColumnType = ColumnInt32;
216
217
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
218
50
                         PaddedPODArray<Int32>& res) {
219
50
        auto size = offsets.size();
220
50
        res.resize(size);
221
144
        for (int i = 0; i < size; ++i) {
222
94
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
223
94
            int str_size = offsets[i] - offsets[i - 1];
224
94
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
225
94
        }
226
50
        return Status::OK();
227
50
    }
228
};
229
230
struct NameIsValidUTF8 {
231
    static constexpr auto name = "is_valid_utf8";
232
};
233
234
struct IsValidUTF8Impl {
235
    using ReturnType = DataTypeUInt8;
236
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
237
    using Type = String;
238
    using ReturnColumnType = ColumnUInt8;
239
240
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
241
39
                         PaddedPODArray<UInt8>& res) {
242
39
        auto size = offsets.size();
243
39
        res.resize(size);
244
98
        for (size_t i = 0; i < size; ++i) {
245
59
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
246
59
            size_t str_size = offsets[i] - offsets[i - 1];
247
59
            res[i] = validate_utf8(raw_str, str_size) ? 1 : 0;
248
59
        }
249
39
        return Status::OK();
250
39
    }
251
};
252
253
struct NameStartsWith {
254
    static constexpr auto name = "starts_with";
255
};
256
257
struct StartsWithOp {
258
    using ResultDataType = DataTypeUInt8;
259
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
260
261
4.23k
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
262
4.23k
        res = strl.starts_with(strr);
263
4.23k
    }
264
};
265
266
struct NameEndsWith {
267
    static constexpr auto name = "ends_with";
268
};
269
270
struct EndsWithOp {
271
    using ResultDataType = DataTypeUInt8;
272
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
273
274
142
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
275
142
        res = strl.ends_with(strr);
276
142
    }
277
};
278
279
struct NameFindInSet {
280
    static constexpr auto name = "find_in_set";
281
};
282
283
struct FindInSetOp {
284
    using ResultDataType = DataTypeInt32;
285
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
286
170
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
287
670
        for (const auto& c : strl) {
288
670
            if (c == ',') {
289
21
                res = 0;
290
21
                return;
291
21
            }
292
670
        }
293
294
149
        int32_t token_index = 1;
295
149
        int32_t start = 0;
296
149
        int32_t end;
297
298
253
        do {
299
253
            end = start;
300
            // Position end.
301
1.05k
            while (end < strr.length() && strr[end] != ',') {
302
806
                ++end;
303
806
            }
304
305
253
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
306
93
                res = token_index;
307
93
                return;
308
93
            }
309
310
            // Re-position start and end past ','
311
160
            start = end + 1;
312
160
            ++token_index;
313
160
        } while (start < strr.length());
314
56
        res = 0;
315
56
    }
316
};
317
318
struct NameInstr {
319
    static constexpr auto name = "instr";
320
};
321
322
// LeftDataType and RightDataType are DataTypeString
323
template <typename LeftDataType, typename RightDataType>
324
struct StringInStrImpl {
325
    using ResultDataType = DataTypeInt32;
326
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
327
328
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
329
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
330
72
        StringRef lstr_ref(ldata.data, ldata.size);
331
332
72
        auto size = roffsets.size();
333
72
        res.resize(size);
334
144
        for (int i = 0; i < size; ++i) {
335
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
336
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
337
338
72
            StringRef rstr_ref(r_raw_str, r_str_size);
339
340
72
            res[i] = execute(lstr_ref, rstr_ref);
341
72
        }
342
343
72
        return Status::OK();
344
72
    }
345
346
    static Status vector_scalar(const ColumnString::Chars& ldata,
347
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
348
86
                                ResultPaddedPODArray& res) {
349
86
        auto size = loffsets.size();
350
86
        res.resize(size);
351
352
86
        if (rdata.size == 0) {
353
12
            std::fill(res.begin(), res.end(), 1);
354
12
            return Status::OK();
355
12
        }
356
357
74
        const UInt8* begin = ldata.data();
358
74
        const UInt8* end = begin + ldata.size();
359
74
        const UInt8* pos = begin;
360
361
        /// Current index in the array of strings.
362
74
        size_t i = 0;
363
74
        std::fill(res.begin(), res.end(), 0);
364
365
74
        StringRef rstr_ref(rdata.data, rdata.size);
366
74
        StringSearch search(&rstr_ref);
367
368
90
        while (pos < end) {
369
            // search return matched substring start offset
370
64
            pos = (UInt8*)search.search((char*)pos, end - pos);
371
64
            if (pos >= end) {
372
48
                break;
373
48
            }
374
375
            /// Determine which index it refers to.
376
            /// begin + value_offsets[i] is the start offset of string at i+1
377
16
            while (begin + loffsets[i] < pos) {
378
0
                ++i;
379
0
            }
380
381
            /// We check that the entry does not pass through the boundaries of strings.
382
16
            if (pos + rdata.size <= begin + loffsets[i]) {
383
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
384
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
385
16
                auto len = std::min(l_str_size, loc);
386
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
387
16
                res[i] = loc + 1;
388
16
            }
389
390
            // move to next string offset
391
16
            pos = begin + loffsets[i];
392
16
            ++i;
393
16
        }
394
395
74
        return Status::OK();
396
86
    }
397
398
    static Status vector_vector(const ColumnString::Chars& ldata,
399
                                const ColumnString::Offsets& loffsets,
400
                                const ColumnString::Chars& rdata,
401
207
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
402
207
        DCHECK_EQ(loffsets.size(), roffsets.size());
403
404
207
        auto size = loffsets.size();
405
207
        res.resize(size);
406
661
        for (int i = 0; i < size; ++i) {
407
454
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
408
454
            int l_str_size = loffsets[i] - loffsets[i - 1];
409
454
            StringRef lstr_ref(l_raw_str, l_str_size);
410
411
454
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
412
454
            int r_str_size = roffsets[i] - roffsets[i - 1];
413
454
            StringRef rstr_ref(r_raw_str, r_str_size);
414
415
454
            res[i] = execute(lstr_ref, rstr_ref);
416
454
        }
417
418
207
        return Status::OK();
419
207
    }
420
421
526
    static int execute(const StringRef& strl, const StringRef& strr) {
422
526
        if (strr.size == 0) {
423
71
            return 1;
424
71
        }
425
426
455
        StringSearch search(&strr);
427
        // Hive returns positions starting from 1.
428
455
        int loc = search.search(&strl);
429
455
        if (loc > 0) {
430
43
            int len = std::min(loc, (int)strl.size);
431
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
432
43
        }
433
434
455
        return loc + 1;
435
526
    }
436
};
437
438
// the same impl as instr
439
struct NameLocate {
440
    static constexpr auto name = "locate";
441
};
442
443
// LeftDataType and RightDataType are DataTypeString
444
template <typename LeftDataType, typename RightDataType>
445
struct StringLocateImpl {
446
    using ResultDataType = DataTypeInt32;
447
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
448
449
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
450
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
451
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
452
38
                                                                           res);
453
38
    }
454
455
    static Status vector_scalar(const ColumnString::Chars& ldata,
456
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
457
36
                                ResultPaddedPODArray& res) {
458
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
459
36
                                                                           res);
460
36
    }
461
462
    static Status vector_vector(const ColumnString::Chars& ldata,
463
                                const ColumnString::Offsets& loffsets,
464
                                const ColumnString::Chars& rdata,
465
126
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
466
126
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
467
126
                                                                           loffsets, res);
468
126
    }
469
};
470
471
// LeftDataType and RightDataType are DataTypeString
472
template <typename LeftDataType, typename RightDataType, typename OP>
473
struct StringFunctionImpl {
474
    using ResultDataType = typename OP::ResultDataType;
475
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
476
477
    static Status vector_vector(const ColumnString::Chars& ldata,
478
                                const ColumnString::Offsets& loffsets,
479
                                const ColumnString::Chars& rdata,
480
213
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
481
213
        DCHECK_EQ(loffsets.size(), roffsets.size());
482
483
213
        auto size = loffsets.size();
484
213
        res.resize(size);
485
576
        for (int i = 0; i < size; ++i) {
486
363
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
487
363
            int l_str_size = loffsets[i] - loffsets[i - 1];
488
489
363
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
490
363
            int r_str_size = roffsets[i] - roffsets[i - 1];
491
492
363
            std::string_view lview(l_raw_str, l_str_size);
493
363
            std::string_view rview(r_raw_str, r_str_size);
494
495
363
            OP::execute(lview, rview, res[i]);
496
363
        }
497
213
        return Status::OK();
498
213
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
480
88
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
481
88
        DCHECK_EQ(loffsets.size(), roffsets.size());
482
483
88
        auto size = loffsets.size();
484
88
        res.resize(size);
485
215
        for (int i = 0; i < size; ++i) {
486
127
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
487
127
            int l_str_size = loffsets[i] - loffsets[i - 1];
488
489
127
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
490
127
            int r_str_size = roffsets[i] - roffsets[i - 1];
491
492
127
            std::string_view lview(l_raw_str, l_str_size);
493
127
            std::string_view rview(r_raw_str, r_str_size);
494
495
127
            OP::execute(lview, rview, res[i]);
496
127
        }
497
88
        return Status::OK();
498
88
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
480
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
481
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
482
483
61
        auto size = loffsets.size();
484
61
        res.resize(size);
485
175
        for (int i = 0; i < size; ++i) {
486
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
487
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
488
489
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
490
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
491
492
114
            std::string_view lview(l_raw_str, l_str_size);
493
114
            std::string_view rview(r_raw_str, r_str_size);
494
495
114
            OP::execute(lview, rview, res[i]);
496
114
        }
497
61
        return Status::OK();
498
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
480
64
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
481
64
        DCHECK_EQ(loffsets.size(), roffsets.size());
482
483
64
        auto size = loffsets.size();
484
64
        res.resize(size);
485
186
        for (int i = 0; i < size; ++i) {
486
122
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
487
122
            int l_str_size = loffsets[i] - loffsets[i - 1];
488
489
122
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
490
122
            int r_str_size = roffsets[i] - roffsets[i - 1];
491
492
122
            std::string_view lview(l_raw_str, l_str_size);
493
122
            std::string_view rview(r_raw_str, r_str_size);
494
495
122
            OP::execute(lview, rview, res[i]);
496
122
        }
497
64
        return Status::OK();
498
64
    }
499
    static Status vector_scalar(const ColumnString::Chars& ldata,
500
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
501
35
                                ResultPaddedPODArray& res) {
502
35
        auto size = loffsets.size();
503
35
        res.resize(size);
504
35
        std::string_view rview(rdata.data, rdata.size);
505
4.16k
        for (int i = 0; i < size; ++i) {
506
4.13k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
507
4.13k
            int l_str_size = loffsets[i] - loffsets[i - 1];
508
4.13k
            std::string_view lview(l_raw_str, l_str_size);
509
510
4.13k
            OP::execute(lview, rview, res[i]);
511
4.13k
        }
512
35
        return Status::OK();
513
35
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
501
5
                                ResultPaddedPODArray& res) {
502
5
        auto size = loffsets.size();
503
5
        res.resize(size);
504
5
        std::string_view rview(rdata.data, rdata.size);
505
4.10k
        for (int i = 0; i < size; ++i) {
506
4.10k
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
507
4.10k
            int l_str_size = loffsets[i] - loffsets[i - 1];
508
4.10k
            std::string_view lview(l_raw_str, l_str_size);
509
510
4.10k
            OP::execute(lview, rview, res[i]);
511
4.10k
        }
512
5
        return Status::OK();
513
5
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
501
14
                                ResultPaddedPODArray& res) {
502
14
        auto size = loffsets.size();
503
14
        res.resize(size);
504
14
        std::string_view rview(rdata.data, rdata.size);
505
28
        for (int i = 0; i < size; ++i) {
506
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
507
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
508
14
            std::string_view lview(l_raw_str, l_str_size);
509
510
14
            OP::execute(lview, rview, res[i]);
511
14
        }
512
14
        return Status::OK();
513
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
501
16
                                ResultPaddedPODArray& res) {
502
16
        auto size = loffsets.size();
503
16
        res.resize(size);
504
16
        std::string_view rview(rdata.data, rdata.size);
505
32
        for (int i = 0; i < size; ++i) {
506
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
507
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
508
16
            std::string_view lview(l_raw_str, l_str_size);
509
510
16
            OP::execute(lview, rview, res[i]);
511
16
        }
512
16
        return Status::OK();
513
16
    }
514
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
515
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
516
44
        auto size = roffsets.size();
517
44
        res.resize(size);
518
44
        std::string_view lview(ldata.data, ldata.size);
519
94
        for (int i = 0; i < size; ++i) {
520
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
521
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
522
50
            std::string_view rview(r_raw_str, r_str_size);
523
524
50
            OP::execute(lview, rview, res[i]);
525
50
        }
526
44
        return Status::OK();
527
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
515
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
516
4
        auto size = roffsets.size();
517
4
        res.resize(size);
518
4
        std::string_view lview(ldata.data, ldata.size);
519
8
        for (int i = 0; i < size; ++i) {
520
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
521
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
522
4
            std::string_view rview(r_raw_str, r_str_size);
523
524
4
            OP::execute(lview, rview, res[i]);
525
4
        }
526
4
        return Status::OK();
527
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
515
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
516
14
        auto size = roffsets.size();
517
14
        res.resize(size);
518
14
        std::string_view lview(ldata.data, ldata.size);
519
28
        for (int i = 0; i < size; ++i) {
520
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
521
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
522
14
            std::string_view rview(r_raw_str, r_str_size);
523
524
14
            OP::execute(lview, rview, res[i]);
525
14
        }
526
14
        return Status::OK();
527
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
515
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
516
26
        auto size = roffsets.size();
517
26
        res.resize(size);
518
26
        std::string_view lview(ldata.data, ldata.size);
519
58
        for (int i = 0; i < size; ++i) {
520
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
521
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
522
32
            std::string_view rview(r_raw_str, r_str_size);
523
524
32
            OP::execute(lview, rview, res[i]);
525
32
        }
526
26
        return Status::OK();
527
26
    }
528
};
529
530
struct NameToLower {
531
    static constexpr auto name = "lower";
532
};
533
534
struct NameToUpper {
535
    static constexpr auto name = "upper";
536
};
537
538
template <typename OpName>
539
struct TransferImpl {
540
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
541
319
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
542
319
        size_t offset_size = offsets.size();
543
319
        if (UNLIKELY(!offset_size)) {
544
0
            return Status::OK();
545
0
        }
546
547
319
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
548
319
        res_offsets.resize(offset_size);
549
319
        if (is_ascii) {
550
260
            memcpy_small_allow_read_write_overflow15(
551
260
                    res_offsets.data(), offsets.data(),
552
260
                    offset_size * sizeof(ColumnString::Offsets::value_type));
553
554
260
            size_t data_length = data.size();
555
260
            res_data.resize(data_length);
556
260
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
557
89
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
558
171
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
559
171
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
560
171
            }
561
260
        } else {
562
59
            execute_utf8(data, offsets, res_data, res_offsets);
563
59
        }
564
565
319
        return Status::OK();
566
319
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
541
192
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
542
192
        size_t offset_size = offsets.size();
543
192
        if (UNLIKELY(!offset_size)) {
544
0
            return Status::OK();
545
0
        }
546
547
192
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
548
192
        res_offsets.resize(offset_size);
549
192
        if (is_ascii) {
550
171
            memcpy_small_allow_read_write_overflow15(
551
171
                    res_offsets.data(), offsets.data(),
552
171
                    offset_size * sizeof(ColumnString::Offsets::value_type));
553
554
171
            size_t data_length = data.size();
555
171
            res_data.resize(data_length);
556
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
557
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
558
171
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
559
171
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
560
171
            }
561
171
        } else {
562
21
            execute_utf8(data, offsets, res_data, res_offsets);
563
21
        }
564
565
192
        return Status::OK();
566
192
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
541
127
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
542
127
        size_t offset_size = offsets.size();
543
127
        if (UNLIKELY(!offset_size)) {
544
0
            return Status::OK();
545
0
        }
546
547
127
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
548
127
        res_offsets.resize(offset_size);
549
127
        if (is_ascii) {
550
89
            memcpy_small_allow_read_write_overflow15(
551
89
                    res_offsets.data(), offsets.data(),
552
89
                    offset_size * sizeof(ColumnString::Offsets::value_type));
553
554
89
            size_t data_length = data.size();
555
89
            res_data.resize(data_length);
556
89
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
557
89
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
558
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
559
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
560
            }
561
89
        } else {
562
38
            execute_utf8(data, offsets, res_data, res_offsets);
563
38
        }
564
565
127
        return Status::OK();
566
127
    }
567
568
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
569
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
570
60
        std::string result;
571
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
572
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
573
138
            uint32_t size = offsets[i] - offsets[i - 1];
574
575
138
            result.clear();
576
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
577
91
                to_upper_utf8(begin, size, result);
578
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
579
47
                to_lower_utf8(begin, size, result);
580
47
            }
581
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
582
138
        }
583
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
569
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
570
21
        std::string result;
571
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
572
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
573
47
            uint32_t size = offsets[i] - offsets[i - 1];
574
575
47
            result.clear();
576
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
577
                to_upper_utf8(begin, size, result);
578
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
579
47
                to_lower_utf8(begin, size, result);
580
47
            }
581
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
582
47
        }
583
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
569
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
570
39
        std::string result;
571
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
572
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
573
91
            uint32_t size = offsets[i] - offsets[i - 1];
574
575
91
            result.clear();
576
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
577
91
                to_upper_utf8(begin, size, result);
578
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
579
                to_lower_utf8(begin, size, result);
580
            }
581
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
582
91
        }
583
39
    }
584
585
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
586
91
        icu::StringPiece sp;
587
91
        sp.set(data, size);
588
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
589
91
        unicode_str.toUpper();
590
91
        unicode_str.toUTF8String(result);
591
91
    }
592
593
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
594
47
        icu::StringPiece sp;
595
47
        sp.set(data, size);
596
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
597
47
        unicode_str.toLower();
598
47
        unicode_str.toUTF8String(result);
599
47
    }
600
};
601
602
// Capitalize first letter
603
struct NameToInitcap {
604
    static constexpr auto name = "initcap";
605
};
606
607
struct InitcapImpl {
608
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
609
172
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
610
172
        res_offsets.resize(offsets.size());
611
612
172
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
613
172
        if (is_ascii) {
614
114
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
615
114
        } else {
616
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
617
58
        }
618
172
        return Status::OK();
619
172
    }
620
621
    static void impl_vectors_ascii(const ColumnString::Chars& data,
622
                                   const ColumnString::Offsets& offsets,
623
                                   ColumnString::Chars& res_data,
624
114
                                   ColumnString::Offsets& res_offsets) {
625
114
        size_t offset_size = offsets.size();
626
114
        memcpy_small_allow_read_write_overflow15(
627
114
                res_offsets.data(), offsets.data(),
628
114
                offset_size * sizeof(ColumnString::Offsets::value_type));
629
630
114
        size_t data_length = data.size();
631
114
        res_data.resize(data_length);
632
114
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
633
634
114
        bool need_capitalize = true;
635
246
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
636
132
            auto end_index = res_offsets[offset_index];
637
132
            need_capitalize = true;
638
639
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
640
1.43k
                if (!::isalnum(res_data[i])) {
641
216
                    need_capitalize = true;
642
1.21k
                } else if (need_capitalize) {
643
                    /*
644
                    https://en.cppreference.com/w/cpp/string/byte/toupper
645
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
646
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
647
                    char my_toupper(char ch)
648
                    {
649
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
650
                    }
651
                    */
652
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
653
267
                    need_capitalize = false;
654
267
                }
655
1.43k
            }
656
657
132
            start_index = end_index;
658
132
        }
659
114
    }
660
661
    static void impl_vectors_utf8(const ColumnString::Chars& data,
662
                                  const ColumnString::Offsets& offsets,
663
                                  ColumnString::Chars& res_data,
664
58
                                  ColumnString::Offsets& res_offsets) {
665
58
        std::string result;
666
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
667
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
668
65
            uint32_t size = offsets[i] - offsets[i - 1];
669
65
            result.clear();
670
65
            to_initcap_utf8(begin, size, result);
671
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
672
65
        }
673
58
    }
674
675
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
676
65
        icu::StringPiece sp;
677
65
        sp.set(data, size);
678
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
679
65
        unicode_str.toLower();
680
65
        icu::UnicodeString output_str;
681
65
        bool need_capitalize = true;
682
65
        icu::StringCharacterIterator iter(unicode_str);
683
647
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
684
582
            if (!u_isalnum(ch)) {
685
105
                need_capitalize = true;
686
477
            } else if (need_capitalize) {
687
87
                ch = u_toupper(ch);
688
87
                need_capitalize = false;
689
87
            }
690
582
            output_str.append(ch);
691
582
        }
692
65
        output_str.toUTF8String(result);
693
65
    }
694
};
695
696
struct NameTrim {
697
    static constexpr auto name = "trim";
698
};
699
struct NameLTrim {
700
    static constexpr auto name = "ltrim";
701
};
702
struct NameRTrim {
703
    static constexpr auto name = "rtrim";
704
};
705
struct NameTrimIn {
706
    static constexpr auto name = "trim_in";
707
};
708
struct NameLTrimIn {
709
    static constexpr auto name = "ltrim_in";
710
};
711
struct NameRTrimIn {
712
    static constexpr auto name = "rtrim_in";
713
};
714
template <bool is_ltrim, bool is_rtrim, bool trim_single>
715
struct TrimUtil {
716
    static Status vector(const ColumnString::Chars& str_data,
717
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
718
299
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
299
        const size_t offset_size = str_offsets.size();
720
299
        res_offsets.resize(offset_size);
721
299
        res_data.reserve(str_data.size());
722
851
        for (size_t i = 0; i < offset_size; ++i) {
723
552
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
552
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
552
            if constexpr (is_ltrim) {
727
335
                str_begin =
728
335
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
335
            }
730
552
            if constexpr (is_rtrim) {
731
395
                str_end =
732
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
395
            }
734
735
552
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
552
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
552
        }
739
299
        return Status::OK();
740
299
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
718
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
58
        const size_t offset_size = str_offsets.size();
720
58
        res_offsets.resize(offset_size);
721
58
        res_data.reserve(str_data.size());
722
178
        for (size_t i = 0; i < offset_size; ++i) {
723
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
120
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
120
            if constexpr (is_ltrim) {
727
120
                str_begin =
728
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
120
            }
730
120
            if constexpr (is_rtrim) {
731
120
                str_end =
732
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
120
            }
734
735
120
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
120
        }
739
58
        return Status::OK();
740
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
718
52
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
52
        const size_t offset_size = str_offsets.size();
720
52
        res_offsets.resize(offset_size);
721
52
        res_data.reserve(str_data.size());
722
148
        for (size_t i = 0; i < offset_size; ++i) {
723
96
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
96
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
96
            if constexpr (is_ltrim) {
727
96
                str_begin =
728
96
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
96
            }
730
            if constexpr (is_rtrim) {
731
                str_end =
732
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
            }
734
735
96
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
96
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
96
        }
739
52
        return Status::OK();
740
52
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
718
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
94
        const size_t offset_size = str_offsets.size();
720
94
        res_offsets.resize(offset_size);
721
94
        res_data.reserve(str_data.size());
722
266
        for (size_t i = 0; i < offset_size; ++i) {
723
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
172
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
            if constexpr (is_ltrim) {
727
                str_begin =
728
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
            }
730
172
            if constexpr (is_rtrim) {
731
172
                str_end =
732
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
172
            }
734
735
172
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
172
        }
739
94
        return Status::OK();
740
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
718
23
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
23
        const size_t offset_size = str_offsets.size();
720
23
        res_offsets.resize(offset_size);
721
23
        res_data.reserve(str_data.size());
722
81
        for (size_t i = 0; i < offset_size; ++i) {
723
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
58
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
58
            if constexpr (is_ltrim) {
727
58
                str_begin =
728
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
58
            }
730
58
            if constexpr (is_rtrim) {
731
58
                str_end =
732
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
58
            }
734
735
58
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
58
        }
739
23
        return Status::OK();
740
23
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
718
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
27
        const size_t offset_size = str_offsets.size();
720
27
        res_offsets.resize(offset_size);
721
27
        res_data.reserve(str_data.size());
722
88
        for (size_t i = 0; i < offset_size; ++i) {
723
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
61
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
61
            if constexpr (is_ltrim) {
727
61
                str_begin =
728
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
61
            }
730
            if constexpr (is_rtrim) {
731
                str_end =
732
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
            }
734
735
61
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
61
        }
739
27
        return Status::OK();
740
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
718
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
719
45
        const size_t offset_size = str_offsets.size();
720
45
        res_offsets.resize(offset_size);
721
45
        res_data.reserve(str_data.size());
722
90
        for (size_t i = 0; i < offset_size; ++i) {
723
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
724
45
            const auto* str_end = str_data.data() + str_offsets[i];
725
726
            if constexpr (is_ltrim) {
727
                str_begin =
728
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
729
            }
730
45
            if constexpr (is_rtrim) {
731
45
                str_end =
732
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
733
45
            }
734
735
45
            res_data.insert_assume_reserved(str_begin, str_end);
736
            // The length of the result of the trim function will never exceed the length of the input.
737
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
738
45
        }
739
45
        return Status::OK();
740
45
    }
741
};
742
template <bool is_ltrim, bool is_rtrim, bool trim_single>
743
struct TrimInUtil {
744
    static Status vector(const ColumnString::Chars& str_data,
745
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
746
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
747
121
        const size_t offset_size = str_offsets.size();
748
121
        res_offsets.resize(offset_size);
749
121
        res_data.reserve(str_data.size());
750
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
751
121
                         simd::VStringFunctions::is_ascii(StringRef(
752
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
753
754
121
        if (all_ascii) {
755
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
756
68
        } else {
757
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
758
53
        }
759
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
746
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
747
43
        const size_t offset_size = str_offsets.size();
748
43
        res_offsets.resize(offset_size);
749
43
        res_data.reserve(str_data.size());
750
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
751
43
                         simd::VStringFunctions::is_ascii(StringRef(
752
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
753
754
43
        if (all_ascii) {
755
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
756
24
        } else {
757
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
758
19
        }
759
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
746
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
747
36
        const size_t offset_size = str_offsets.size();
748
36
        res_offsets.resize(offset_size);
749
36
        res_data.reserve(str_data.size());
750
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
751
36
                         simd::VStringFunctions::is_ascii(StringRef(
752
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
753
754
36
        if (all_ascii) {
755
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
756
19
        } else {
757
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
758
17
        }
759
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
746
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
747
42
        const size_t offset_size = str_offsets.size();
748
42
        res_offsets.resize(offset_size);
749
42
        res_data.reserve(str_data.size());
750
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
751
42
                         simd::VStringFunctions::is_ascii(StringRef(
752
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
753
754
42
        if (all_ascii) {
755
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
756
25
        } else {
757
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
758
17
        }
759
42
    }
760
761
private:
762
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
763
                                     const ColumnString::Offsets& str_offsets,
764
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
765
68
                                     ColumnString::Offsets& res_offsets) {
766
68
        const size_t offset_size = str_offsets.size();
767
68
        std::bitset<128> char_lookup;
768
68
        const char* remove_begin = remove_str.data;
769
68
        const char* remove_end = remove_str.data + remove_str.size;
770
771
251
        while (remove_begin < remove_end) {
772
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
773
183
            remove_begin += 1;
774
183
        }
775
776
136
        for (size_t i = 0; i < offset_size; ++i) {
777
68
            const char* str_begin =
778
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
779
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
780
68
            const char* left_trim_pos = str_begin;
781
68
            const char* right_trim_pos = str_end;
782
783
68
            if constexpr (is_ltrim) {
784
127
                while (left_trim_pos < str_end) {
785
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
786
30
                        break;
787
30
                    }
788
84
                    ++left_trim_pos;
789
84
                }
790
43
            }
791
792
68
            if constexpr (is_rtrim) {
793
114
                while (right_trim_pos > left_trim_pos) {
794
100
                    --right_trim_pos;
795
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
796
35
                        ++right_trim_pos;
797
35
                        break;
798
35
                    }
799
100
                }
800
49
            }
801
802
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
803
            // The length of the result of the trim function will never exceed the length of the input.
804
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
805
68
        }
806
807
68
        return Status::OK();
808
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
765
24
                                     ColumnString::Offsets& res_offsets) {
766
24
        const size_t offset_size = str_offsets.size();
767
24
        std::bitset<128> char_lookup;
768
24
        const char* remove_begin = remove_str.data;
769
24
        const char* remove_end = remove_str.data + remove_str.size;
770
771
86
        while (remove_begin < remove_end) {
772
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
773
62
            remove_begin += 1;
774
62
        }
775
776
48
        for (size_t i = 0; i < offset_size; ++i) {
777
24
            const char* str_begin =
778
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
779
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
780
24
            const char* left_trim_pos = str_begin;
781
24
            const char* right_trim_pos = str_end;
782
783
24
            if constexpr (is_ltrim) {
784
57
                while (left_trim_pos < str_end) {
785
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
786
17
                        break;
787
17
                    }
788
33
                    ++left_trim_pos;
789
33
                }
790
24
            }
791
792
24
            if constexpr (is_rtrim) {
793
39
                while (right_trim_pos > left_trim_pos) {
794
32
                    --right_trim_pos;
795
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
796
17
                        ++right_trim_pos;
797
17
                        break;
798
17
                    }
799
32
                }
800
24
            }
801
802
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
803
            // The length of the result of the trim function will never exceed the length of the input.
804
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
805
24
        }
806
807
24
        return Status::OK();
808
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
765
19
                                     ColumnString::Offsets& res_offsets) {
766
19
        const size_t offset_size = str_offsets.size();
767
19
        std::bitset<128> char_lookup;
768
19
        const char* remove_begin = remove_str.data;
769
19
        const char* remove_end = remove_str.data + remove_str.size;
770
771
73
        while (remove_begin < remove_end) {
772
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
773
54
            remove_begin += 1;
774
54
        }
775
776
38
        for (size_t i = 0; i < offset_size; ++i) {
777
19
            const char* str_begin =
778
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
779
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
780
19
            const char* left_trim_pos = str_begin;
781
19
            const char* right_trim_pos = str_end;
782
783
19
            if constexpr (is_ltrim) {
784
70
                while (left_trim_pos < str_end) {
785
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
786
13
                        break;
787
13
                    }
788
51
                    ++left_trim_pos;
789
51
                }
790
19
            }
791
792
            if constexpr (is_rtrim) {
793
                while (right_trim_pos > left_trim_pos) {
794
                    --right_trim_pos;
795
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
796
                        ++right_trim_pos;
797
                        break;
798
                    }
799
                }
800
            }
801
802
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
803
            // The length of the result of the trim function will never exceed the length of the input.
804
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
805
19
        }
806
807
19
        return Status::OK();
808
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
765
25
                                     ColumnString::Offsets& res_offsets) {
766
25
        const size_t offset_size = str_offsets.size();
767
25
        std::bitset<128> char_lookup;
768
25
        const char* remove_begin = remove_str.data;
769
25
        const char* remove_end = remove_str.data + remove_str.size;
770
771
92
        while (remove_begin < remove_end) {
772
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
773
67
            remove_begin += 1;
774
67
        }
775
776
50
        for (size_t i = 0; i < offset_size; ++i) {
777
25
            const char* str_begin =
778
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
779
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
780
25
            const char* left_trim_pos = str_begin;
781
25
            const char* right_trim_pos = str_end;
782
783
            if constexpr (is_ltrim) {
784
                while (left_trim_pos < str_end) {
785
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
786
                        break;
787
                    }
788
                    ++left_trim_pos;
789
                }
790
            }
791
792
25
            if constexpr (is_rtrim) {
793
75
                while (right_trim_pos > left_trim_pos) {
794
68
                    --right_trim_pos;
795
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
796
18
                        ++right_trim_pos;
797
18
                        break;
798
18
                    }
799
68
                }
800
25
            }
801
802
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
803
            // The length of the result of the trim function will never exceed the length of the input.
804
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
805
25
        }
806
807
25
        return Status::OK();
808
25
    }
809
810
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
811
                                    const ColumnString::Offsets& str_offsets,
812
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
813
53
                                    ColumnString::Offsets& res_offsets) {
814
53
        const size_t offset_size = str_offsets.size();
815
53
        res_offsets.resize(offset_size);
816
53
        res_data.reserve(str_data.size());
817
818
53
        std::unordered_set<std::string_view> char_lookup;
819
53
        const char* remove_begin = remove_str.data;
820
53
        const char* remove_end = remove_str.data + remove_str.size;
821
822
240
        while (remove_begin < remove_end) {
823
187
            size_t byte_len, char_len;
824
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
825
187
                    remove_begin, remove_end, 1);
826
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
827
187
            remove_begin += byte_len;
828
187
        }
829
830
140
        for (size_t i = 0; i < offset_size; ++i) {
831
87
            const char* str_begin =
832
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
833
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
834
87
            const char* left_trim_pos = str_begin;
835
87
            const char* right_trim_pos = str_end;
836
837
87
            if constexpr (is_ltrim) {
838
81
                while (left_trim_pos < str_end) {
839
73
                    size_t byte_len, char_len;
840
73
                    std::tie(byte_len, char_len) =
841
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
842
73
                                                                                   str_end, 1);
843
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
844
73
                        char_lookup.end()) {
845
52
                        break;
846
52
                    }
847
21
                    left_trim_pos += byte_len;
848
21
                }
849
60
            }
850
851
87
            if constexpr (is_rtrim) {
852
88
                while (right_trim_pos > left_trim_pos) {
853
80
                    const char* prev_char_pos = right_trim_pos;
854
156
                    do {
855
156
                        --prev_char_pos;
856
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
857
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
858
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
859
80
                        char_lookup.end()) {
860
52
                        break;
861
52
                    }
862
28
                    right_trim_pos = prev_char_pos;
863
28
                }
864
60
            }
865
866
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
867
            // The length of the result of the trim function will never exceed the length of the input.
868
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
869
87
        }
870
53
        return Status::OK();
871
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
813
19
                                    ColumnString::Offsets& res_offsets) {
814
19
        const size_t offset_size = str_offsets.size();
815
19
        res_offsets.resize(offset_size);
816
19
        res_data.reserve(str_data.size());
817
818
19
        std::unordered_set<std::string_view> char_lookup;
819
19
        const char* remove_begin = remove_str.data;
820
19
        const char* remove_end = remove_str.data + remove_str.size;
821
822
84
        while (remove_begin < remove_end) {
823
65
            size_t byte_len, char_len;
824
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
825
65
                    remove_begin, remove_end, 1);
826
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
827
65
            remove_begin += byte_len;
828
65
        }
829
830
52
        for (size_t i = 0; i < offset_size; ++i) {
831
33
            const char* str_begin =
832
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
833
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
834
33
            const char* left_trim_pos = str_begin;
835
33
            const char* right_trim_pos = str_end;
836
837
33
            if constexpr (is_ltrim) {
838
45
                while (left_trim_pos < str_end) {
839
41
                    size_t byte_len, char_len;
840
41
                    std::tie(byte_len, char_len) =
841
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
842
41
                                                                                   str_end, 1);
843
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
844
41
                        char_lookup.end()) {
845
29
                        break;
846
29
                    }
847
12
                    left_trim_pos += byte_len;
848
12
                }
849
33
            }
850
851
33
            if constexpr (is_rtrim) {
852
48
                while (right_trim_pos > left_trim_pos) {
853
44
                    const char* prev_char_pos = right_trim_pos;
854
90
                    do {
855
90
                        --prev_char_pos;
856
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
857
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
858
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
859
44
                        char_lookup.end()) {
860
29
                        break;
861
29
                    }
862
15
                    right_trim_pos = prev_char_pos;
863
15
                }
864
33
            }
865
866
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
867
            // The length of the result of the trim function will never exceed the length of the input.
868
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
869
33
        }
870
19
        return Status::OK();
871
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
813
17
                                    ColumnString::Offsets& res_offsets) {
814
17
        const size_t offset_size = str_offsets.size();
815
17
        res_offsets.resize(offset_size);
816
17
        res_data.reserve(str_data.size());
817
818
17
        std::unordered_set<std::string_view> char_lookup;
819
17
        const char* remove_begin = remove_str.data;
820
17
        const char* remove_end = remove_str.data + remove_str.size;
821
822
78
        while (remove_begin < remove_end) {
823
61
            size_t byte_len, char_len;
824
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
825
61
                    remove_begin, remove_end, 1);
826
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
827
61
            remove_begin += byte_len;
828
61
        }
829
830
44
        for (size_t i = 0; i < offset_size; ++i) {
831
27
            const char* str_begin =
832
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
833
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
834
27
            const char* left_trim_pos = str_begin;
835
27
            const char* right_trim_pos = str_end;
836
837
27
            if constexpr (is_ltrim) {
838
36
                while (left_trim_pos < str_end) {
839
32
                    size_t byte_len, char_len;
840
32
                    std::tie(byte_len, char_len) =
841
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
842
32
                                                                                   str_end, 1);
843
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
844
32
                        char_lookup.end()) {
845
23
                        break;
846
23
                    }
847
9
                    left_trim_pos += byte_len;
848
9
                }
849
27
            }
850
851
            if constexpr (is_rtrim) {
852
                while (right_trim_pos > left_trim_pos) {
853
                    const char* prev_char_pos = right_trim_pos;
854
                    do {
855
                        --prev_char_pos;
856
                    } while ((*prev_char_pos & 0xC0) == 0x80);
857
                    size_t byte_len = right_trim_pos - prev_char_pos;
858
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
859
                        char_lookup.end()) {
860
                        break;
861
                    }
862
                    right_trim_pos = prev_char_pos;
863
                }
864
            }
865
866
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
867
            // The length of the result of the trim function will never exceed the length of the input.
868
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
869
27
        }
870
17
        return Status::OK();
871
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
813
17
                                    ColumnString::Offsets& res_offsets) {
814
17
        const size_t offset_size = str_offsets.size();
815
17
        res_offsets.resize(offset_size);
816
17
        res_data.reserve(str_data.size());
817
818
17
        std::unordered_set<std::string_view> char_lookup;
819
17
        const char* remove_begin = remove_str.data;
820
17
        const char* remove_end = remove_str.data + remove_str.size;
821
822
78
        while (remove_begin < remove_end) {
823
61
            size_t byte_len, char_len;
824
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
825
61
                    remove_begin, remove_end, 1);
826
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
827
61
            remove_begin += byte_len;
828
61
        }
829
830
44
        for (size_t i = 0; i < offset_size; ++i) {
831
27
            const char* str_begin =
832
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
833
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
834
27
            const char* left_trim_pos = str_begin;
835
27
            const char* right_trim_pos = str_end;
836
837
            if constexpr (is_ltrim) {
838
                while (left_trim_pos < str_end) {
839
                    size_t byte_len, char_len;
840
                    std::tie(byte_len, char_len) =
841
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
842
                                                                                   str_end, 1);
843
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
844
                        char_lookup.end()) {
845
                        break;
846
                    }
847
                    left_trim_pos += byte_len;
848
                }
849
            }
850
851
27
            if constexpr (is_rtrim) {
852
40
                while (right_trim_pos > left_trim_pos) {
853
36
                    const char* prev_char_pos = right_trim_pos;
854
66
                    do {
855
66
                        --prev_char_pos;
856
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
857
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
858
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
859
36
                        char_lookup.end()) {
860
23
                        break;
861
23
                    }
862
13
                    right_trim_pos = prev_char_pos;
863
13
                }
864
27
            }
865
866
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
867
            // The length of the result of the trim function will never exceed the length of the input.
868
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
869
27
        }
870
17
        return Status::OK();
871
17
    }
872
};
873
// This is an implementation of a parameter for the Trim function.
874
template <bool is_ltrim, bool is_rtrim, typename Name>
875
struct Trim1Impl {
876
    static constexpr auto name = Name::name;
877
878
157
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
878
45
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
878
35
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
878
41
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
878
9
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
878
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
878
14
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
879
880
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
881
139
                          uint32_t result, size_t input_rows_count) {
882
139
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
139
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
139
            auto col_res = ColumnString::create();
885
139
            char blank[] = " ";
886
139
            const StringRef remove_str(blank, 1);
887
139
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
139
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
139
                    col_res->get_offsets())));
890
139
            block.replace_by_position(result, std::move(col_res));
891
139
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
139
        return Status::OK();
897
139
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
881
48
                          uint32_t result, size_t input_rows_count) {
882
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
48
            auto col_res = ColumnString::create();
885
48
            char blank[] = " ";
886
48
            const StringRef remove_str(blank, 1);
887
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
48
                    col_res->get_offsets())));
890
48
            block.replace_by_position(result, std::move(col_res));
891
48
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
48
        return Status::OK();
897
48
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
881
37
                          uint32_t result, size_t input_rows_count) {
882
37
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
37
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
37
            auto col_res = ColumnString::create();
885
37
            char blank[] = " ";
886
37
            const StringRef remove_str(blank, 1);
887
37
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
37
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
37
                    col_res->get_offsets())));
890
37
            block.replace_by_position(result, std::move(col_res));
891
37
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
37
        return Status::OK();
897
37
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
881
42
                          uint32_t result, size_t input_rows_count) {
882
42
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
42
            auto col_res = ColumnString::create();
885
42
            char blank[] = " ";
886
42
            const StringRef remove_str(blank, 1);
887
42
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
42
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
42
                    col_res->get_offsets())));
890
42
            block.replace_by_position(result, std::move(col_res));
891
42
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
42
        return Status::OK();
897
42
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
881
1
                          uint32_t result, size_t input_rows_count) {
882
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
1
            auto col_res = ColumnString::create();
885
1
            char blank[] = " ";
886
1
            const StringRef remove_str(blank, 1);
887
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
1
                    col_res->get_offsets())));
890
1
            block.replace_by_position(result, std::move(col_res));
891
1
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
1
        return Status::OK();
897
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
881
5
                          uint32_t result, size_t input_rows_count) {
882
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
5
            auto col_res = ColumnString::create();
885
5
            char blank[] = " ";
886
5
            const StringRef remove_str(blank, 1);
887
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
5
                    col_res->get_offsets())));
890
5
            block.replace_by_position(result, std::move(col_res));
891
5
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
5
        return Status::OK();
897
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
881
6
                          uint32_t result, size_t input_rows_count) {
882
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
883
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
884
6
            auto col_res = ColumnString::create();
885
6
            char blank[] = " ";
886
6
            const StringRef remove_str(blank, 1);
887
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
888
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
889
6
                    col_res->get_offsets())));
890
6
            block.replace_by_position(result, std::move(col_res));
891
6
        } else {
892
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
893
0
                                        block.get_by_position(arguments[0]).column->get_name(),
894
0
                                        name);
895
0
        }
896
6
        return Status::OK();
897
6
    }
898
};
899
900
// This is an implementation of two parameters for the Trim function.
901
template <bool is_ltrim, bool is_rtrim, typename Name>
902
struct Trim2Impl {
903
    static constexpr auto name = Name::name;
904
905
226
    static DataTypes get_variadic_argument_types() {
906
226
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
226
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
905
20
    static DataTypes get_variadic_argument_types() {
906
20
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
20
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
905
29
    static DataTypes get_variadic_argument_types() {
906
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
905
84
    static DataTypes get_variadic_argument_types() {
906
84
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
84
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
905
27
    static DataTypes get_variadic_argument_types() {
906
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
27
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
905
29
    static DataTypes get_variadic_argument_types() {
906
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
905
37
    static DataTypes get_variadic_argument_types() {
906
37
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
907
37
    }
908
909
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
910
281
                          uint32_t result, size_t input_rows_count) {
911
281
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
281
        const auto& rcol =
913
281
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
281
                        ->get_data_column_ptr();
915
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
281
                auto col_res = ColumnString::create();
918
281
                const auto* remove_str_raw = col_right->get_chars().data();
919
281
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
281
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
281
                if (remove_str.size == 1) {
923
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
65
                            col_res->get_offsets())));
926
216
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
121
                                  std::is_same<Name, NameRTrimIn>::value) {
930
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
121
                                col->get_chars(), col->get_offsets(), remove_str,
932
121
                                col_res->get_chars(), col_res->get_offsets())));
933
121
                    } else {
934
95
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
95
                                col->get_chars(), col->get_offsets(), remove_str,
936
95
                                col_res->get_chars(), col_res->get_offsets())));
937
95
                    }
938
216
                }
939
281
                block.replace_by_position(result, std::move(col_res));
940
281
            } else {
941
1
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
1
                                            block.get_by_position(arguments[1]).column->get_name(),
943
1
                                            name);
944
1
            }
945
946
18.4E
        } else {
947
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
949
18.4E
                                        name);
950
18.4E
        }
951
281
        return Status::OK();
952
281
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
910
25
                          uint32_t result, size_t input_rows_count) {
911
25
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
25
        const auto& rcol =
913
25
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
25
                        ->get_data_column_ptr();
915
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
25
                auto col_res = ColumnString::create();
918
25
                const auto* remove_str_raw = col_right->get_chars().data();
919
25
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
25
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
25
                if (remove_str.size == 1) {
923
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
2
                            col_res->get_offsets())));
926
23
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
                                  std::is_same<Name, NameRTrimIn>::value) {
930
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
                                col->get_chars(), col->get_offsets(), remove_str,
932
                                col_res->get_chars(), col_res->get_offsets())));
933
23
                    } else {
934
23
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
23
                                col->get_chars(), col->get_offsets(), remove_str,
936
23
                                col_res->get_chars(), col_res->get_offsets())));
937
23
                    }
938
23
                }
939
25
                block.replace_by_position(result, std::move(col_res));
940
25
            } else {
941
1
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
1
                                            block.get_by_position(arguments[1]).column->get_name(),
943
1
                                            name);
944
1
            }
945
946
18.4E
        } else {
947
18.4E
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
18.4E
                                        block.get_by_position(arguments[0]).column->get_name(),
949
18.4E
                                        name);
950
18.4E
        }
951
25
        return Status::OK();
952
25
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
910
32
                          uint32_t result, size_t input_rows_count) {
911
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
32
        const auto& rcol =
913
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
32
                        ->get_data_column_ptr();
915
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
32
                auto col_res = ColumnString::create();
918
32
                const auto* remove_str_raw = col_right->get_chars().data();
919
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
32
                if (remove_str.size == 1) {
923
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
5
                            col_res->get_offsets())));
926
27
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
                                  std::is_same<Name, NameRTrimIn>::value) {
930
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
                                col->get_chars(), col->get_offsets(), remove_str,
932
                                col_res->get_chars(), col_res->get_offsets())));
933
27
                    } else {
934
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
27
                                col->get_chars(), col->get_offsets(), remove_str,
936
27
                                col_res->get_chars(), col_res->get_offsets())));
937
27
                    }
938
27
                }
939
32
                block.replace_by_position(result, std::move(col_res));
940
32
            } else {
941
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
0
                                            block.get_by_position(arguments[1]).column->get_name(),
943
0
                                            name);
944
0
            }
945
946
32
        } else {
947
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
0
                                        block.get_by_position(arguments[0]).column->get_name(),
949
0
                                        name);
950
0
        }
951
32
        return Status::OK();
952
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
910
85
                          uint32_t result, size_t input_rows_count) {
911
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
85
        const auto& rcol =
913
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
85
                        ->get_data_column_ptr();
915
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
85
                auto col_res = ColumnString::create();
918
85
                const auto* remove_str_raw = col_right->get_chars().data();
919
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
85
                if (remove_str.size == 1) {
923
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
40
                            col_res->get_offsets())));
926
45
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
                                  std::is_same<Name, NameRTrimIn>::value) {
930
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
                                col->get_chars(), col->get_offsets(), remove_str,
932
                                col_res->get_chars(), col_res->get_offsets())));
933
45
                    } else {
934
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
45
                                col->get_chars(), col->get_offsets(), remove_str,
936
45
                                col_res->get_chars(), col_res->get_offsets())));
937
45
                    }
938
45
                }
939
85
                block.replace_by_position(result, std::move(col_res));
940
85
            } else {
941
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
0
                                            block.get_by_position(arguments[1]).column->get_name(),
943
0
                                            name);
944
0
            }
945
946
85
        } else {
947
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
0
                                        block.get_by_position(arguments[0]).column->get_name(),
949
0
                                        name);
950
0
        }
951
85
        return Status::OK();
952
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
910
50
                          uint32_t result, size_t input_rows_count) {
911
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
50
        const auto& rcol =
913
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
50
                        ->get_data_column_ptr();
915
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
50
                auto col_res = ColumnString::create();
918
50
                const auto* remove_str_raw = col_right->get_chars().data();
919
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
50
                if (remove_str.size == 1) {
923
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
7
                            col_res->get_offsets())));
926
43
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
43
                                  std::is_same<Name, NameRTrimIn>::value) {
930
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
43
                                col->get_chars(), col->get_offsets(), remove_str,
932
43
                                col_res->get_chars(), col_res->get_offsets())));
933
                    } else {
934
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
                                col->get_chars(), col->get_offsets(), remove_str,
936
                                col_res->get_chars(), col_res->get_offsets())));
937
                    }
938
43
                }
939
50
                block.replace_by_position(result, std::move(col_res));
940
50
            } else {
941
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
0
                                            block.get_by_position(arguments[1]).column->get_name(),
943
0
                                            name);
944
0
            }
945
946
50
        } else {
947
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
0
                                        block.get_by_position(arguments[0]).column->get_name(),
949
0
                                        name);
950
0
        }
951
50
        return Status::OK();
952
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
910
41
                          uint32_t result, size_t input_rows_count) {
911
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
41
        const auto& rcol =
913
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
41
                        ->get_data_column_ptr();
915
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
41
                auto col_res = ColumnString::create();
918
41
                const auto* remove_str_raw = col_right->get_chars().data();
919
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
41
                if (remove_str.size == 1) {
923
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
5
                            col_res->get_offsets())));
926
36
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
36
                                  std::is_same<Name, NameRTrimIn>::value) {
930
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
36
                                col->get_chars(), col->get_offsets(), remove_str,
932
36
                                col_res->get_chars(), col_res->get_offsets())));
933
                    } else {
934
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
                                col->get_chars(), col->get_offsets(), remove_str,
936
                                col_res->get_chars(), col_res->get_offsets())));
937
                    }
938
36
                }
939
41
                block.replace_by_position(result, std::move(col_res));
940
41
            } else {
941
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
0
                                            block.get_by_position(arguments[1]).column->get_name(),
943
0
                                            name);
944
0
            }
945
946
41
        } else {
947
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
0
                                        block.get_by_position(arguments[0]).column->get_name(),
949
0
                                        name);
950
0
        }
951
41
        return Status::OK();
952
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
910
48
                          uint32_t result, size_t input_rows_count) {
911
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
912
48
        const auto& rcol =
913
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
914
48
                        ->get_data_column_ptr();
915
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
916
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
917
48
                auto col_res = ColumnString::create();
918
48
                const auto* remove_str_raw = col_right->get_chars().data();
919
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
920
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
921
922
48
                if (remove_str.size == 1) {
923
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
924
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
925
6
                            col_res->get_offsets())));
926
42
                } else {
927
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
928
                                  std::is_same<Name, NameLTrimIn>::value ||
929
42
                                  std::is_same<Name, NameRTrimIn>::value) {
930
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
931
42
                                col->get_chars(), col->get_offsets(), remove_str,
932
42
                                col_res->get_chars(), col_res->get_offsets())));
933
                    } else {
934
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
935
                                col->get_chars(), col->get_offsets(), remove_str,
936
                                col_res->get_chars(), col_res->get_offsets())));
937
                    }
938
42
                }
939
48
                block.replace_by_position(result, std::move(col_res));
940
48
            } else {
941
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
942
0
                                            block.get_by_position(arguments[1]).column->get_name(),
943
0
                                            name);
944
0
            }
945
946
48
        } else {
947
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
948
0
                                        block.get_by_position(arguments[0]).column->get_name(),
949
0
                                        name);
950
0
        }
951
48
        return Status::OK();
952
48
    }
953
};
954
955
template <typename impl>
956
class FunctionTrim : public IFunction {
957
public:
958
    static constexpr auto name = impl::name;
959
395
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
959
46
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
959
36
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
959
42
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
959
21
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
959
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
959
85
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
959
10
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
959
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
959
15
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
959
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
959
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
959
38
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
960
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
960
1
    String get_name() const override { return impl::name; }
961
962
287
    size_t get_number_of_arguments() const override {
963
287
        return get_variadic_argument_types_impl().size();
964
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
962
37
    size_t get_number_of_arguments() const override {
963
37
        return get_variadic_argument_types_impl().size();
964
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
962
27
    size_t get_number_of_arguments() const override {
963
27
        return get_variadic_argument_types_impl().size();
964
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
962
33
    size_t get_number_of_arguments() const override {
963
33
        return get_variadic_argument_types_impl().size();
964
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
962
12
    size_t get_number_of_arguments() const override {
963
12
        return get_variadic_argument_types_impl().size();
964
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
962
21
    size_t get_number_of_arguments() const override {
963
21
        return get_variadic_argument_types_impl().size();
964
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
962
76
    size_t get_number_of_arguments() const override {
963
76
        return get_variadic_argument_types_impl().size();
964
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
962
1
    size_t get_number_of_arguments() const override {
963
1
        return get_variadic_argument_types_impl().size();
964
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
962
5
    size_t get_number_of_arguments() const override {
963
5
        return get_variadic_argument_types_impl().size();
964
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
962
6
    size_t get_number_of_arguments() const override {
963
6
        return get_variadic_argument_types_impl().size();
964
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
962
19
    size_t get_number_of_arguments() const override {
963
19
        return get_variadic_argument_types_impl().size();
964
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
962
21
    size_t get_number_of_arguments() const override {
963
21
        return get_variadic_argument_types_impl().size();
964
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
962
29
    size_t get_number_of_arguments() const override {
963
29
        return get_variadic_argument_types_impl().size();
964
29
    }
965
966
287
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
287
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
287
        return arguments[0];
973
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
37
        return arguments[0];
973
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
27
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
27
        return arguments[0];
973
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
33
        return arguments[0];
973
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
12
        return arguments[0];
973
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
21
        return arguments[0];
973
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
76
        return arguments[0];
973
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
1
        return arguments[0];
973
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
5
        return arguments[0];
973
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
6
        return arguments[0];
973
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
19
        return arguments[0];
973
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
21
        return arguments[0];
973
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
966
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
967
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
968
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
969
0
                                   "Illegal type {} of argument of function {}",
970
0
                                   arguments[0]->get_name(), get_name());
971
0
        }
972
29
        return arguments[0];
973
29
    }
974
    // The second parameter of "trim" is a constant.
975
569
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
58
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
36
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
975
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
976
977
383
    DataTypes get_variadic_argument_types_impl() const override {
978
383
        return impl::get_variadic_argument_types();
979
383
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
45
    DataTypes get_variadic_argument_types_impl() const override {
978
45
        return impl::get_variadic_argument_types();
979
45
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
35
    DataTypes get_variadic_argument_types_impl() const override {
978
35
        return impl::get_variadic_argument_types();
979
35
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
41
    DataTypes get_variadic_argument_types_impl() const override {
978
41
        return impl::get_variadic_argument_types();
979
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
20
    DataTypes get_variadic_argument_types_impl() const override {
978
20
        return impl::get_variadic_argument_types();
979
20
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
29
    DataTypes get_variadic_argument_types_impl() const override {
978
29
        return impl::get_variadic_argument_types();
979
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
84
    DataTypes get_variadic_argument_types_impl() const override {
978
84
        return impl::get_variadic_argument_types();
979
84
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
9
    DataTypes get_variadic_argument_types_impl() const override {
978
9
        return impl::get_variadic_argument_types();
979
9
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
13
    DataTypes get_variadic_argument_types_impl() const override {
978
13
        return impl::get_variadic_argument_types();
979
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
14
    DataTypes get_variadic_argument_types_impl() const override {
978
14
        return impl::get_variadic_argument_types();
979
14
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
27
    DataTypes get_variadic_argument_types_impl() const override {
978
27
        return impl::get_variadic_argument_types();
979
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
29
    DataTypes get_variadic_argument_types_impl() const override {
978
29
        return impl::get_variadic_argument_types();
979
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
977
37
    DataTypes get_variadic_argument_types_impl() const override {
978
37
        return impl::get_variadic_argument_types();
979
37
    }
980
981
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
982
420
                        uint32_t result, size_t input_rows_count) const override {
983
420
        return impl::execute(context, block, arguments, result, input_rows_count);
984
420
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
48
                        uint32_t result, size_t input_rows_count) const override {
983
48
        return impl::execute(context, block, arguments, result, input_rows_count);
984
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
37
                        uint32_t result, size_t input_rows_count) const override {
983
37
        return impl::execute(context, block, arguments, result, input_rows_count);
984
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
42
                        uint32_t result, size_t input_rows_count) const override {
983
42
        return impl::execute(context, block, arguments, result, input_rows_count);
984
42
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
25
                        uint32_t result, size_t input_rows_count) const override {
983
25
        return impl::execute(context, block, arguments, result, input_rows_count);
984
25
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
32
                        uint32_t result, size_t input_rows_count) const override {
983
32
        return impl::execute(context, block, arguments, result, input_rows_count);
984
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
85
                        uint32_t result, size_t input_rows_count) const override {
983
85
        return impl::execute(context, block, arguments, result, input_rows_count);
984
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
1
                        uint32_t result, size_t input_rows_count) const override {
983
1
        return impl::execute(context, block, arguments, result, input_rows_count);
984
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
5
                        uint32_t result, size_t input_rows_count) const override {
983
5
        return impl::execute(context, block, arguments, result, input_rows_count);
984
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
6
                        uint32_t result, size_t input_rows_count) const override {
983
6
        return impl::execute(context, block, arguments, result, input_rows_count);
984
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
50
                        uint32_t result, size_t input_rows_count) const override {
983
50
        return impl::execute(context, block, arguments, result, input_rows_count);
984
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
41
                        uint32_t result, size_t input_rows_count) const override {
983
41
        return impl::execute(context, block, arguments, result, input_rows_count);
984
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
982
48
                        uint32_t result, size_t input_rows_count) const override {
983
48
        return impl::execute(context, block, arguments, result, input_rows_count);
984
48
    }
985
};
986
987
struct UnHexImplEmpty {
988
    static constexpr auto name = "unhex";
989
};
990
991
struct UnHexImplNull {
992
    static constexpr auto name = "unhex_null";
993
};
994
995
template <typename Name>
996
struct UnHexImpl {
997
    static constexpr auto name = Name::name;
998
    using ReturnType = DataTypeString;
999
    using ColumnType = ColumnString;
1000
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1001
1002
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1003
160
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1004
160
        auto rows_count = offsets.size();
1005
160
        dst_offsets.resize(rows_count);
1006
1007
160
        int64_t total_size = 0;
1008
368
        for (size_t i = 0; i < rows_count; i++) {
1009
208
            size_t len = offsets[i] - offsets[i - 1];
1010
208
            total_size += len / 2;
1011
208
        }
1012
160
        ColumnString::check_chars_length(total_size, rows_count);
1013
160
        dst_data.resize(total_size);
1014
160
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1015
160
        size_t offset = 0;
1016
1017
368
        for (int i = 0; i < rows_count; ++i) {
1018
208
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1019
208
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1020
1021
208
            if (UNLIKELY(srclen == 0)) {
1022
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1023
13
                continue;
1024
13
            }
1025
1026
195
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1027
1028
195
            offset += outlen;
1029
195
            dst_offsets[i] = cast_set<uint32_t>(offset);
1030
195
        }
1031
160
        dst_data.pop_back(total_size - offset);
1032
160
        return Status::OK();
1033
160
    }
1034
1035
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1036
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1037
33
                         ColumnUInt8::Container* null_map_data) {
1038
33
        auto rows_count = offsets.size();
1039
33
        dst_offsets.resize(rows_count);
1040
1041
33
        int64_t total_size = 0;
1042
84
        for (size_t i = 0; i < rows_count; i++) {
1043
51
            size_t len = offsets[i] - offsets[i - 1];
1044
51
            total_size += len / 2;
1045
51
        }
1046
33
        ColumnString::check_chars_length(total_size, rows_count);
1047
33
        dst_data.resize(total_size);
1048
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1049
33
        size_t offset = 0;
1050
1051
84
        for (int i = 0; i < rows_count; ++i) {
1052
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1053
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1054
1055
51
            if (UNLIKELY(srclen == 0)) {
1056
7
                (*null_map_data)[i] = 1;
1057
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1058
7
                continue;
1059
7
            }
1060
1061
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1062
1063
44
            if (outlen == 0) {
1064
13
                (*null_map_data)[i] = 1;
1065
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1066
13
                continue;
1067
13
            }
1068
1069
31
            offset += outlen;
1070
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1071
31
        }
1072
33
        dst_data.pop_back(total_size - offset);
1073
33
        return Status::OK();
1074
33
    }
1075
};
1076
1077
struct NameStringSpace {
1078
    static constexpr auto name = "space";
1079
};
1080
1081
struct StringSpace {
1082
    using ReturnType = DataTypeString;
1083
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1084
    using Type = Int32;
1085
    using ReturnColumnType = ColumnString;
1086
1087
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1088
10
                         ColumnString::Offsets& res_offsets) {
1089
10
        res_offsets.resize(data.size());
1090
10
        size_t input_size = res_offsets.size();
1091
10
        int64_t total_size = 0;
1092
34
        for (size_t i = 0; i < input_size; ++i) {
1093
24
            if (data[i] > 0) {
1094
14
                total_size += data[i];
1095
14
            }
1096
24
        }
1097
10
        ColumnString::check_chars_length(total_size, input_size);
1098
10
        res_data.reserve(total_size);
1099
1100
34
        for (size_t i = 0; i < input_size; ++i) {
1101
24
            if (data[i] > 0) [[likely]] {
1102
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1103
14
                cast_set(res_offsets[i], res_data.size());
1104
14
            } else {
1105
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1106
10
            }
1107
24
        }
1108
10
        return Status::OK();
1109
10
    }
1110
};
1111
1112
struct ToBase64Impl {
1113
    static constexpr auto name = "to_base64";
1114
    using ReturnType = DataTypeString;
1115
    using ColumnType = ColumnString;
1116
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1117
1118
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1119
107
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1120
107
        auto rows_count = offsets.size();
1121
107
        dst_offsets.resize(rows_count);
1122
1123
107
        size_t total_size = 0;
1124
250
        for (size_t i = 0; i < rows_count; i++) {
1125
143
            size_t len = offsets[i] - offsets[i - 1];
1126
143
            total_size += 4 * ((len + 2) / 3);
1127
143
        }
1128
107
        ColumnString::check_chars_length(total_size, rows_count);
1129
107
        dst_data.resize(total_size);
1130
107
        auto* dst_data_ptr = dst_data.data();
1131
107
        size_t offset = 0;
1132
1133
250
        for (int i = 0; i < rows_count; ++i) {
1134
143
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1135
143
            size_t srclen = offsets[i] - offsets[i - 1];
1136
1137
143
            if (UNLIKELY(srclen == 0)) {
1138
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1139
7
                continue;
1140
7
            }
1141
1142
136
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1143
136
                                               (unsigned char*)(dst_data_ptr + offset));
1144
1145
136
            offset += outlen;
1146
136
            dst_offsets[i] = cast_set<uint32_t>(offset);
1147
136
        }
1148
107
        dst_data.pop_back(total_size - offset);
1149
107
        return Status::OK();
1150
107
    }
1151
};
1152
1153
struct FromBase64Impl {
1154
    static constexpr auto name = "from_base64";
1155
    using ReturnType = DataTypeString;
1156
    using ColumnType = ColumnString;
1157
1158
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1159
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1160
109
                         NullMap& null_map) {
1161
109
        auto rows_count = offsets.size();
1162
109
        dst_offsets.resize(rows_count);
1163
1164
109
        size_t total_size = 0;
1165
271
        for (size_t i = 0; i < rows_count; i++) {
1166
162
            auto len = offsets[i] - offsets[i - 1];
1167
162
            total_size += len / 4 * 3;
1168
162
        }
1169
109
        ColumnString::check_chars_length(total_size, rows_count);
1170
109
        dst_data.resize(total_size);
1171
109
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1172
109
        size_t offset = 0;
1173
1174
271
        for (int i = 0; i < rows_count; ++i) {
1175
162
            if (UNLIKELY(null_map[i])) {
1176
0
                null_map[i] = 1;
1177
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1178
0
                continue;
1179
0
            }
1180
1181
162
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1182
162
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1183
1184
162
            if (UNLIKELY(srclen == 0)) {
1185
6
                dst_offsets[i] = cast_set<uint32_t>(offset);
1186
6
                continue;
1187
6
            }
1188
1189
156
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1190
1191
156
            if (outlen < 0) {
1192
60
                null_map[i] = 1;
1193
60
                dst_offsets[i] = cast_set<uint32_t>(offset);
1194
96
            } else {
1195
96
                offset += outlen;
1196
96
                dst_offsets[i] = cast_set<uint32_t>(offset);
1197
96
            }
1198
156
        }
1199
109
        dst_data.pop_back(total_size - offset);
1200
109
        return Status::OK();
1201
109
    }
1202
};
1203
1204
struct StringAppendTrailingCharIfAbsent {
1205
    static constexpr auto name = "append_trailing_char_if_absent";
1206
    using Chars = ColumnString::Chars;
1207
    using Offsets = ColumnString::Offsets;
1208
    using ReturnType = DataTypeString;
1209
    using ColumnType = ColumnString;
1210
1211
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1212
48
        if (str.size < end.size) {
1213
11
            return false;
1214
11
        }
1215
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1216
37
        return str.end_with(end);
1217
48
    }
1218
1219
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1220
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1221
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1222
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1223
56
        size_t input_rows_count = loffsets.size();
1224
56
        res_offsets.resize(input_rows_count);
1225
56
        fmt::memory_buffer buffer;
1226
1227
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1228
102
            buffer.clear();
1229
1230
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1231
102
                                       loffsets[i] - loffsets[i - 1]);
1232
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1233
102
                                       roffsets[i] - roffsets[i - 1]);
1234
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1235
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1236
102
                    rstr.begin(), rstr.end(), 2);
1237
1238
102
            if (char_len != 1) {
1239
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1240
66
                continue;
1241
66
            }
1242
36
            if (str_end_with(lstr, rstr)) {
1243
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1244
9
                continue;
1245
9
            }
1246
1247
27
            buffer.append(lstr.begin(), lstr.end());
1248
27
            buffer.append(rstr.begin(), rstr.end());
1249
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1250
27
                                        res_offsets);
1251
27
        }
1252
56
    }
1253
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1254
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1255
8
                              NullMap& null_map_data) {
1256
8
        size_t input_rows_count = loffsets.size();
1257
8
        res_offsets.resize(input_rows_count);
1258
8
        fmt::memory_buffer buffer;
1259
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1260
8
        auto [byte_len, char_len] =
1261
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1262
8
        if (char_len != 1) {
1263
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1264
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1265
2
            }
1266
2
            return;
1267
2
        }
1268
1269
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1270
6
            buffer.clear();
1271
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1272
6
                                       loffsets[i] - loffsets[i - 1]);
1273
1274
6
            if (str_end_with(lstr, rstr)) {
1275
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1276
2
                continue;
1277
2
            }
1278
1279
4
            buffer.append(lstr.begin(), lstr.end());
1280
4
            buffer.append(rstr.begin(), rstr.end());
1281
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1282
4
                                        res_offsets);
1283
4
        }
1284
6
    }
1285
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1286
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1287
8
                              NullMap& null_map_data) {
1288
8
        size_t input_rows_count = roffsets.size();
1289
8
        res_offsets.resize(input_rows_count);
1290
8
        fmt::memory_buffer buffer;
1291
1292
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1293
8
            buffer.clear();
1294
1295
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1296
8
                                       roffsets[i] - roffsets[i - 1]);
1297
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1298
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1299
8
                    rstr.begin(), rstr.end(), 2);
1300
1301
8
            if (char_len != 1) {
1302
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1303
2
                continue;
1304
2
            }
1305
6
            if (str_end_with(lstr, rstr)) {
1306
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1307
2
                continue;
1308
2
            }
1309
1310
4
            buffer.append(lstr.begin(), lstr.end());
1311
4
            buffer.append(rstr.begin(), rstr.end());
1312
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1313
4
                                        res_offsets);
1314
4
        }
1315
8
    }
1316
};
1317
1318
struct StringLPad {
1319
    static constexpr auto name = "lpad";
1320
    static constexpr auto is_lpad = true;
1321
};
1322
1323
struct StringRPad {
1324
    static constexpr auto name = "rpad";
1325
    static constexpr auto is_lpad = false;
1326
};
1327
1328
template <typename LeftDataType, typename RightDataType>
1329
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1330
1331
template <typename LeftDataType, typename RightDataType>
1332
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1333
1334
template <typename LeftDataType, typename RightDataType>
1335
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1336
1337
// ready for regist function
1338
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1339
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1340
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1341
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1342
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1343
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1344
using FunctionIsValidUTF8 = FunctionUnaryToType<IsValidUTF8Impl, NameIsValidUTF8>;
1345
class FunctionStringStartsWith : public FunctionBinaryToType<DataTypeString, DataTypeString,
1346
                                                             StringStartsWithImpl, NameStartsWith> {
1347
public:
1348
104
    static FunctionPtr create() { return std::make_shared<FunctionStringStartsWith>(); }
1349
2
    bool is_deterministic() const override { return true; }
1350
    ZoneMapFilterResult evaluate_zonemap_filter(const ZoneMapEvalContext& ctx,
1351
2
                                                const VExprSPtrs& arguments) const override {
1352
2
        return segment_filter::eval_starts_with_zonemap(ctx, arguments);
1353
2
    }
1354
};
1355
1356
using FunctionStringEndsWith =
1357
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1358
using FunctionStringInstr =
1359
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1360
using FunctionStringLocate =
1361
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1362
using FunctionStringFindInSet =
1363
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1364
1365
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1366
1367
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1368
1369
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1370
1371
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1372
1373
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1374
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1375
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1376
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1377
1378
using FunctionStringAppendTrailingCharIfAbsent =
1379
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1380
1381
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1382
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1383
1384
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1385
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1386
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1387
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1388
extern void register_function_string_search(SimpleFunctionFactory& factory);
1389
extern void register_function_string_url(SimpleFunctionFactory& factory);
1390
1391
8
void register_function_string(SimpleFunctionFactory& factory) {
1392
8
    register_function_string_basic(factory);
1393
8
    register_function_string_digest(factory);
1394
8
    register_function_string_mask(factory);
1395
8
    register_function_string_misc(factory);
1396
8
    register_function_string_search(factory);
1397
8
    register_function_string_url(factory);
1398
1399
8
    factory.register_function<FunctionStringParseDataSize>();
1400
8
    factory.register_function<FunctionStringASCII>();
1401
8
    factory.register_function<FunctionStringLength>();
1402
8
    factory.register_function<FunctionCrc32>();
1403
8
    factory.register_function<FunctionStringUTF8Length>();
1404
8
    factory.register_function<FunctionStringSpace>();
1405
8
    factory.register_function<FunctionStringStartsWith>();
1406
8
    factory.register_function<FunctionStringEndsWith>();
1407
8
    factory.register_function<FunctionStringInstr>();
1408
8
    factory.register_function<FunctionStringFindInSet>();
1409
8
    factory.register_function<FunctionStringLocate>();
1410
8
    factory.register_function<FunctionQuote>();
1411
8
    factory.register_function<FunctionReverseCommon>();
1412
8
    factory.register_function<FunctionUnHex>();
1413
8
    factory.register_function<FunctionUnHexNullable>();
1414
8
    factory.register_function<FunctionToLower>();
1415
8
    factory.register_function<FunctionToUpper>();
1416
8
    factory.register_function<FunctionToInitcap>();
1417
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1418
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1419
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1420
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1421
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1422
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1423
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1424
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1425
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1426
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1427
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1428
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1429
8
    factory.register_function<FunctionStringConcat>();
1430
8
    factory.register_function<FunctionStringElt>();
1431
8
    factory.register_function<FunctionStringConcatWs>();
1432
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1433
8
    factory.register_function<FunctionStringRepeat>();
1434
8
    factory.register_function<FunctionStringLPad>();
1435
8
    factory.register_function<FunctionStringRPad>();
1436
8
    factory.register_function<FunctionToBase64>();
1437
8
    factory.register_function<FunctionFromBase64>();
1438
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1439
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1440
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1441
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1442
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1443
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1444
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1445
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1446
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1447
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1448
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1449
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1450
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1451
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1452
8
    factory.register_function<
1453
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1454
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1455
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1456
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1457
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1458
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1459
8
    factory.register_function<FunctionOverlay>();
1460
8
    factory.register_function<FunctionIsValidUTF8>();
1461
1462
8
    factory.register_alias(FunctionIsValidUTF8::name, "isValidUTF8");
1463
8
    factory.register_alias(FunctionToLower::name, "lcase");
1464
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1465
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1466
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1467
8
    factory.register_alias(FunctionOverlay::name, "insert");
1468
8
}
1469
1470
} // namespace doris