Coverage Report

Created: 2026-04-10 10:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include <ctype.h>
19
#include <math.h>
20
#include <re2/stringpiece.h>
21
#include <unicode/schriter.h>
22
#include <unicode/uchar.h>
23
#include <unicode/unistr.h>
24
#include <unicode/ustream.h>
25
26
#include <bitset>
27
#include <cstddef>
28
#include <cstdint>
29
#include <string_view>
30
31
#include "common/cast_set.h"
32
#include "common/status.h"
33
#include "core/column/column.h"
34
#include "core/column/column_string.h"
35
#include "core/pod_array_fwd.h"
36
#include "core/string_ref.h"
37
#include "exprs/function/function_reverse.h"
38
#include "exprs/function/function_string_concat.h"
39
#include "exprs/function/function_string_format.h"
40
#include "exprs/function/function_string_replace.h"
41
#include "exprs/function/function_string_to_string.h"
42
#include "exprs/function/function_totype.h"
43
#include "exprs/function/simple_function_factory.h"
44
#include "exprs/function/string_hex_util.h"
45
#include "util/string_search.hpp"
46
#include "util/url_coding.h"
47
48
namespace doris {
49
#include "common/compile_check_begin.h"
50
struct NameStringASCII {
51
    static constexpr auto name = "ascii";
52
};
53
54
struct StringASCII {
55
    using ReturnType = DataTypeInt32;
56
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
57
    using Type = String;
58
    using ReturnColumnType = ColumnInt32;
59
60
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
61
55
                         PaddedPODArray<Int32>& res) {
62
55
        auto size = offsets.size();
63
55
        res.resize(size);
64
154
        for (int i = 0; i < size; ++i) {
65
99
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
66
99
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
67
99
        }
68
55
        return Status::OK();
69
55
    }
70
};
71
72
struct NameParseDataSize {
73
    static constexpr auto name = "parse_data_size";
74
};
75
76
static const std::map<std::string_view, Int128> UNITS = {
77
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
78
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
79
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
80
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
81
        {"YB", static_cast<Int128>(1) << 80}};
82
83
struct ParseDataSize {
84
    using ReturnType = DataTypeInt128;
85
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
86
    using Type = String;
87
    using ReturnColumnType = ColumnInt128;
88
89
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
90
48
                         PaddedPODArray<Int128>& res) {
91
48
        auto size = offsets.size();
92
48
        res.resize(size);
93
100
        for (int i = 0; i < size; ++i) {
94
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
95
52
            int str_size = offsets[i] - offsets[i - 1];
96
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
97
52
        }
98
48
        return Status::OK();
99
48
    }
100
101
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
102
52
        int digit_length = 0;
103
216
        for (char c : dataSize) {
104
216
            if (isdigit(c) || c == '.') {
105
166
                digit_length++;
106
166
            } else {
107
50
                break;
108
50
            }
109
216
        }
110
111
52
        if (digit_length == 0) {
112
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
113
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
114
4
                                   dataSize);
115
4
        }
116
        // 123.45MB--->123.45 : MB
117
48
        double value = 0.0;
118
48
        try {
119
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
120
48
        } catch (const std::exception& e) {
121
0
            throw doris::Exception(
122
0
                    ErrorCode::INVALID_ARGUMENT,
123
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
124
0
                    dataSize, e.what());
125
0
        }
126
48
        auto unit = dataSize.substr(digit_length);
127
48
        auto it = UNITS.find(unit);
128
48
        if (it != UNITS.end()) {
129
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
130
45
        } else {
131
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
132
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
133
3
                                   dataSize);
134
3
        }
135
48
    }
136
};
137
138
struct NameQuote {
139
    static constexpr auto name = "quote";
140
};
141
142
struct NameQuoteImpl {
143
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
144
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
145
17
        size_t offset_size = offsets.size();
146
17
        ColumnString::Offset pos = 0;
147
17
        res_offsets.resize(offset_size);
148
17
        res_data.resize(data.size() + offset_size * 2);
149
45
        for (int i = 0; i < offset_size; i++) {
150
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
151
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
152
28
            res_data[pos] = '\'';
153
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
154
28
            res_data[pos + size + 1] = '\'';
155
28
            pos += size + 2;
156
28
            res_offsets[i] = pos;
157
28
        }
158
17
        return Status::OK();
159
17
    }
160
};
161
162
struct NameStringLength {
163
    static constexpr auto name = "length";
164
};
165
166
struct StringLengthImpl {
167
    using ReturnType = DataTypeInt32;
168
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
169
    using Type = String;
170
    using ReturnColumnType = ColumnInt32;
171
172
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
173
5.67k
                         PaddedPODArray<Int32>& res) {
174
5.67k
        auto size = offsets.size();
175
5.67k
        res.resize(size);
176
4.37M
        for (int i = 0; i < size; ++i) {
177
4.36M
            int str_size = offsets[i] - offsets[i - 1];
178
4.36M
            res[i] = str_size;
179
4.36M
        }
180
5.67k
        return Status::OK();
181
5.67k
    }
182
};
183
184
struct NameCrc32 {
185
    static constexpr auto name = "crc32";
186
};
187
188
struct Crc32Impl {
189
    using ReturnType = DataTypeInt64;
190
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
191
    using Type = String;
192
    using ReturnColumnType = ColumnInt64;
193
194
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
195
3
                         PaddedPODArray<Int64>& res) {
196
3
        auto size = offsets.size();
197
3
        res.resize(size);
198
6
        for (int i = 0; i < size; ++i) {
199
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
200
3
                             offsets[i] - offsets[i - 1]);
201
3
        }
202
3
        return Status::OK();
203
3
    }
204
};
205
206
struct NameStringUtf8Length {
207
    static constexpr auto name = "char_length";
208
};
209
210
struct StringUtf8LengthImpl {
211
    using ReturnType = DataTypeInt32;
212
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
213
    using Type = String;
214
    using ReturnColumnType = ColumnInt32;
215
216
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
217
50
                         PaddedPODArray<Int32>& res) {
218
50
        auto size = offsets.size();
219
50
        res.resize(size);
220
144
        for (int i = 0; i < size; ++i) {
221
94
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
222
94
            int str_size = offsets[i] - offsets[i - 1];
223
94
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
224
94
        }
225
50
        return Status::OK();
226
50
    }
227
};
228
229
struct NameStartsWith {
230
    static constexpr auto name = "starts_with";
231
};
232
233
struct StartsWithOp {
234
    using ResultDataType = DataTypeUInt8;
235
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
236
237
135
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
238
135
        res = strl.starts_with(strr);
239
135
    }
240
};
241
242
struct NameEndsWith {
243
    static constexpr auto name = "ends_with";
244
};
245
246
struct EndsWithOp {
247
    using ResultDataType = DataTypeUInt8;
248
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
249
250
142
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
251
142
        res = strl.ends_with(strr);
252
142
    }
253
};
254
255
struct NameFindInSet {
256
    static constexpr auto name = "find_in_set";
257
};
258
259
struct FindInSetOp {
260
    using ResultDataType = DataTypeInt32;
261
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
262
170
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
263
670
        for (const auto& c : strl) {
264
670
            if (c == ',') {
265
21
                res = 0;
266
21
                return;
267
21
            }
268
670
        }
269
270
149
        int32_t token_index = 1;
271
149
        int32_t start = 0;
272
149
        int32_t end;
273
274
253
        do {
275
253
            end = start;
276
            // Position end.
277
1.05k
            while (end < strr.length() && strr[end] != ',') {
278
806
                ++end;
279
806
            }
280
281
253
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
282
93
                res = token_index;
283
93
                return;
284
93
            }
285
286
            // Re-position start and end past ','
287
160
            start = end + 1;
288
160
            ++token_index;
289
160
        } while (start < strr.length());
290
56
        res = 0;
291
56
    }
292
};
293
294
struct NameInstr {
295
    static constexpr auto name = "instr";
296
};
297
298
// LeftDataType and RightDataType are DataTypeString
299
template <typename LeftDataType, typename RightDataType>
300
struct StringInStrImpl {
301
    using ResultDataType = DataTypeInt32;
302
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
303
304
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
305
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
306
72
        StringRef lstr_ref(ldata.data, ldata.size);
307
308
72
        auto size = roffsets.size();
309
72
        res.resize(size);
310
144
        for (int i = 0; i < size; ++i) {
311
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
312
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
313
314
72
            StringRef rstr_ref(r_raw_str, r_str_size);
315
316
72
            res[i] = execute(lstr_ref, rstr_ref);
317
72
        }
318
319
72
        return Status::OK();
320
72
    }
321
322
    static Status vector_scalar(const ColumnString::Chars& ldata,
323
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
324
86
                                ResultPaddedPODArray& res) {
325
86
        auto size = loffsets.size();
326
86
        res.resize(size);
327
328
86
        if (rdata.size == 0) {
329
12
            std::fill(res.begin(), res.end(), 1);
330
12
            return Status::OK();
331
12
        }
332
333
74
        const UInt8* begin = ldata.data();
334
74
        const UInt8* end = begin + ldata.size();
335
74
        const UInt8* pos = begin;
336
337
        /// Current index in the array of strings.
338
74
        size_t i = 0;
339
74
        std::fill(res.begin(), res.end(), 0);
340
341
74
        StringRef rstr_ref(rdata.data, rdata.size);
342
74
        StringSearch search(&rstr_ref);
343
344
90
        while (pos < end) {
345
            // search return matched substring start offset
346
64
            pos = (UInt8*)search.search((char*)pos, end - pos);
347
64
            if (pos >= end) {
348
48
                break;
349
48
            }
350
351
            /// Determine which index it refers to.
352
            /// begin + value_offsets[i] is the start offset of string at i+1
353
16
            while (begin + loffsets[i] < pos) {
354
0
                ++i;
355
0
            }
356
357
            /// We check that the entry does not pass through the boundaries of strings.
358
16
            if (pos + rdata.size <= begin + loffsets[i]) {
359
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
360
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
361
16
                auto len = std::min(l_str_size, loc);
362
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
363
16
                res[i] = loc + 1;
364
16
            }
365
366
            // move to next string offset
367
16
            pos = begin + loffsets[i];
368
16
            ++i;
369
16
        }
370
371
74
        return Status::OK();
372
86
    }
373
374
    static Status vector_vector(const ColumnString::Chars& ldata,
375
                                const ColumnString::Offsets& loffsets,
376
                                const ColumnString::Chars& rdata,
377
207
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
378
207
        DCHECK_EQ(loffsets.size(), roffsets.size());
379
380
207
        auto size = loffsets.size();
381
207
        res.resize(size);
382
661
        for (int i = 0; i < size; ++i) {
383
454
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
384
454
            int l_str_size = loffsets[i] - loffsets[i - 1];
385
454
            StringRef lstr_ref(l_raw_str, l_str_size);
386
387
454
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
388
454
            int r_str_size = roffsets[i] - roffsets[i - 1];
389
454
            StringRef rstr_ref(r_raw_str, r_str_size);
390
391
454
            res[i] = execute(lstr_ref, rstr_ref);
392
454
        }
393
394
207
        return Status::OK();
395
207
    }
396
397
526
    static int execute(const StringRef& strl, const StringRef& strr) {
398
526
        if (strr.size == 0) {
399
71
            return 1;
400
71
        }
401
402
455
        StringSearch search(&strr);
403
        // Hive returns positions starting from 1.
404
455
        int loc = search.search(&strl);
405
455
        if (loc > 0) {
406
43
            int len = std::min(loc, (int)strl.size);
407
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
408
43
        }
409
410
455
        return loc + 1;
411
526
    }
412
};
413
414
// the same impl as instr
415
struct NameLocate {
416
    static constexpr auto name = "locate";
417
};
418
419
// LeftDataType and RightDataType are DataTypeString
420
template <typename LeftDataType, typename RightDataType>
421
struct StringLocateImpl {
422
    using ResultDataType = DataTypeInt32;
423
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
424
425
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
426
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
427
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
428
38
                                                                           res);
429
38
    }
430
431
    static Status vector_scalar(const ColumnString::Chars& ldata,
432
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
433
36
                                ResultPaddedPODArray& res) {
434
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
435
36
                                                                           res);
436
36
    }
437
438
    static Status vector_vector(const ColumnString::Chars& ldata,
439
                                const ColumnString::Offsets& loffsets,
440
                                const ColumnString::Chars& rdata,
441
126
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
442
126
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
443
126
                                                                           loffsets, res);
444
126
    }
445
};
446
447
// LeftDataType and RightDataType are DataTypeString
448
template <typename LeftDataType, typename RightDataType, typename OP>
449
struct StringFunctionImpl {
450
    using ResultDataType = typename OP::ResultDataType;
451
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
452
453
    static Status vector_vector(const ColumnString::Chars& ldata,
454
                                const ColumnString::Offsets& loffsets,
455
                                const ColumnString::Chars& rdata,
456
213
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
457
213
        DCHECK_EQ(loffsets.size(), roffsets.size());
458
459
213
        auto size = loffsets.size();
460
213
        res.resize(size);
461
576
        for (int i = 0; i < size; ++i) {
462
363
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
463
363
            int l_str_size = loffsets[i] - loffsets[i - 1];
464
465
363
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
466
363
            int r_str_size = roffsets[i] - roffsets[i - 1];
467
468
363
            std::string_view lview(l_raw_str, l_str_size);
469
363
            std::string_view rview(r_raw_str, r_str_size);
470
471
363
            OP::execute(lview, rview, res[i]);
472
363
        }
473
213
        return Status::OK();
474
213
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
456
88
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
457
88
        DCHECK_EQ(loffsets.size(), roffsets.size());
458
459
88
        auto size = loffsets.size();
460
88
        res.resize(size);
461
215
        for (int i = 0; i < size; ++i) {
462
127
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
463
127
            int l_str_size = loffsets[i] - loffsets[i - 1];
464
465
127
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
466
127
            int r_str_size = roffsets[i] - roffsets[i - 1];
467
468
127
            std::string_view lview(l_raw_str, l_str_size);
469
127
            std::string_view rview(r_raw_str, r_str_size);
470
471
127
            OP::execute(lview, rview, res[i]);
472
127
        }
473
88
        return Status::OK();
474
88
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
456
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
457
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
458
459
61
        auto size = loffsets.size();
460
61
        res.resize(size);
461
175
        for (int i = 0; i < size; ++i) {
462
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
463
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
464
465
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
466
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
467
468
114
            std::string_view lview(l_raw_str, l_str_size);
469
114
            std::string_view rview(r_raw_str, r_str_size);
470
471
114
            OP::execute(lview, rview, res[i]);
472
114
        }
473
61
        return Status::OK();
474
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
456
64
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
457
64
        DCHECK_EQ(loffsets.size(), roffsets.size());
458
459
64
        auto size = loffsets.size();
460
64
        res.resize(size);
461
186
        for (int i = 0; i < size; ++i) {
462
122
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
463
122
            int l_str_size = loffsets[i] - loffsets[i - 1];
464
465
122
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
466
122
            int r_str_size = roffsets[i] - roffsets[i - 1];
467
468
122
            std::string_view lview(l_raw_str, l_str_size);
469
122
            std::string_view rview(r_raw_str, r_str_size);
470
471
122
            OP::execute(lview, rview, res[i]);
472
122
        }
473
64
        return Status::OK();
474
64
    }
475
    static Status vector_scalar(const ColumnString::Chars& ldata,
476
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
477
34
                                ResultPaddedPODArray& res) {
478
34
        auto size = loffsets.size();
479
34
        res.resize(size);
480
34
        std::string_view rview(rdata.data, rdata.size);
481
68
        for (int i = 0; i < size; ++i) {
482
34
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
483
34
            int l_str_size = loffsets[i] - loffsets[i - 1];
484
34
            std::string_view lview(l_raw_str, l_str_size);
485
486
34
            OP::execute(lview, rview, res[i]);
487
34
        }
488
34
        return Status::OK();
489
34
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
477
4
                                ResultPaddedPODArray& res) {
478
4
        auto size = loffsets.size();
479
4
        res.resize(size);
480
4
        std::string_view rview(rdata.data, rdata.size);
481
8
        for (int i = 0; i < size; ++i) {
482
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
483
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
484
4
            std::string_view lview(l_raw_str, l_str_size);
485
486
4
            OP::execute(lview, rview, res[i]);
487
4
        }
488
4
        return Status::OK();
489
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
477
14
                                ResultPaddedPODArray& res) {
478
14
        auto size = loffsets.size();
479
14
        res.resize(size);
480
14
        std::string_view rview(rdata.data, rdata.size);
481
28
        for (int i = 0; i < size; ++i) {
482
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
483
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
484
14
            std::string_view lview(l_raw_str, l_str_size);
485
486
14
            OP::execute(lview, rview, res[i]);
487
14
        }
488
14
        return Status::OK();
489
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
477
16
                                ResultPaddedPODArray& res) {
478
16
        auto size = loffsets.size();
479
16
        res.resize(size);
480
16
        std::string_view rview(rdata.data, rdata.size);
481
32
        for (int i = 0; i < size; ++i) {
482
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
483
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
484
16
            std::string_view lview(l_raw_str, l_str_size);
485
486
16
            OP::execute(lview, rview, res[i]);
487
16
        }
488
16
        return Status::OK();
489
16
    }
490
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
491
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
492
44
        auto size = roffsets.size();
493
44
        res.resize(size);
494
44
        std::string_view lview(ldata.data, ldata.size);
495
94
        for (int i = 0; i < size; ++i) {
496
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
50
            std::string_view rview(r_raw_str, r_str_size);
499
500
50
            OP::execute(lview, rview, res[i]);
501
50
        }
502
44
        return Status::OK();
503
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
491
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
492
4
        auto size = roffsets.size();
493
4
        res.resize(size);
494
4
        std::string_view lview(ldata.data, ldata.size);
495
8
        for (int i = 0; i < size; ++i) {
496
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
4
            std::string_view rview(r_raw_str, r_str_size);
499
500
4
            OP::execute(lview, rview, res[i]);
501
4
        }
502
4
        return Status::OK();
503
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
491
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
492
14
        auto size = roffsets.size();
493
14
        res.resize(size);
494
14
        std::string_view lview(ldata.data, ldata.size);
495
28
        for (int i = 0; i < size; ++i) {
496
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
14
            std::string_view rview(r_raw_str, r_str_size);
499
500
14
            OP::execute(lview, rview, res[i]);
501
14
        }
502
14
        return Status::OK();
503
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
491
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
492
26
        auto size = roffsets.size();
493
26
        res.resize(size);
494
26
        std::string_view lview(ldata.data, ldata.size);
495
58
        for (int i = 0; i < size; ++i) {
496
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
497
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
498
32
            std::string_view rview(r_raw_str, r_str_size);
499
500
32
            OP::execute(lview, rview, res[i]);
501
32
        }
502
26
        return Status::OK();
503
26
    }
504
};
505
506
struct NameToLower {
507
    static constexpr auto name = "lower";
508
};
509
510
struct NameToUpper {
511
    static constexpr auto name = "upper";
512
};
513
514
template <typename OpName>
515
struct TransferImpl {
516
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
517
329
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
518
329
        size_t offset_size = offsets.size();
519
329
        if (UNLIKELY(!offset_size)) {
520
0
            return Status::OK();
521
0
        }
522
523
329
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
524
329
        res_offsets.resize(offset_size);
525
329
        if (is_ascii) {
526
269
            memcpy_small_allow_read_write_overflow15(
527
269
                    res_offsets.data(), offsets.data(),
528
269
                    offset_size * sizeof(ColumnString::Offsets::value_type));
529
530
269
            size_t data_length = data.size();
531
269
            res_data.resize(data_length);
532
269
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
533
85
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
534
184
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
535
184
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
536
184
            }
537
269
        } else {
538
60
            execute_utf8(data, offsets, res_data, res_offsets);
539
60
        }
540
541
329
        return Status::OK();
542
329
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
517
205
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
518
205
        size_t offset_size = offsets.size();
519
205
        if (UNLIKELY(!offset_size)) {
520
0
            return Status::OK();
521
0
        }
522
523
205
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
524
205
        res_offsets.resize(offset_size);
525
205
        if (is_ascii) {
526
184
            memcpy_small_allow_read_write_overflow15(
527
184
                    res_offsets.data(), offsets.data(),
528
184
                    offset_size * sizeof(ColumnString::Offsets::value_type));
529
530
184
            size_t data_length = data.size();
531
184
            res_data.resize(data_length);
532
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
533
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
534
184
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
535
184
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
536
184
            }
537
184
        } else {
538
21
            execute_utf8(data, offsets, res_data, res_offsets);
539
21
        }
540
541
205
        return Status::OK();
542
205
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
517
124
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
518
124
        size_t offset_size = offsets.size();
519
124
        if (UNLIKELY(!offset_size)) {
520
0
            return Status::OK();
521
0
        }
522
523
124
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
524
124
        res_offsets.resize(offset_size);
525
124
        if (is_ascii) {
526
85
            memcpy_small_allow_read_write_overflow15(
527
85
                    res_offsets.data(), offsets.data(),
528
85
                    offset_size * sizeof(ColumnString::Offsets::value_type));
529
530
85
            size_t data_length = data.size();
531
85
            res_data.resize(data_length);
532
85
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
533
85
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
534
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
535
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
536
            }
537
85
        } else {
538
39
            execute_utf8(data, offsets, res_data, res_offsets);
539
39
        }
540
541
124
        return Status::OK();
542
124
    }
543
544
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
545
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
546
60
        std::string result;
547
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
548
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
549
138
            uint32_t size = offsets[i] - offsets[i - 1];
550
551
138
            result.clear();
552
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
553
91
                to_upper_utf8(begin, size, result);
554
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
555
47
                to_lower_utf8(begin, size, result);
556
47
            }
557
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
558
138
        }
559
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
545
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
546
21
        std::string result;
547
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
548
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
549
47
            uint32_t size = offsets[i] - offsets[i - 1];
550
551
47
            result.clear();
552
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
553
                to_upper_utf8(begin, size, result);
554
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
555
47
                to_lower_utf8(begin, size, result);
556
47
            }
557
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
558
47
        }
559
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
545
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
546
39
        std::string result;
547
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
548
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
549
91
            uint32_t size = offsets[i] - offsets[i - 1];
550
551
91
            result.clear();
552
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
553
91
                to_upper_utf8(begin, size, result);
554
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
555
                to_lower_utf8(begin, size, result);
556
            }
557
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
558
91
        }
559
39
    }
560
561
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
562
91
        icu::StringPiece sp;
563
91
        sp.set(data, size);
564
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
565
91
        unicode_str.toUpper();
566
91
        unicode_str.toUTF8String(result);
567
91
    }
568
569
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
570
47
        icu::StringPiece sp;
571
47
        sp.set(data, size);
572
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
573
47
        unicode_str.toLower();
574
47
        unicode_str.toUTF8String(result);
575
47
    }
576
};
577
578
// Capitalize first letter
579
struct NameToInitcap {
580
    static constexpr auto name = "initcap";
581
};
582
583
struct InitcapImpl {
584
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
585
172
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
586
172
        res_offsets.resize(offsets.size());
587
588
172
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
589
172
        if (is_ascii) {
590
114
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
591
114
        } else {
592
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
593
58
        }
594
172
        return Status::OK();
595
172
    }
596
597
    static void impl_vectors_ascii(const ColumnString::Chars& data,
598
                                   const ColumnString::Offsets& offsets,
599
                                   ColumnString::Chars& res_data,
600
114
                                   ColumnString::Offsets& res_offsets) {
601
114
        size_t offset_size = offsets.size();
602
114
        memcpy_small_allow_read_write_overflow15(
603
114
                res_offsets.data(), offsets.data(),
604
114
                offset_size * sizeof(ColumnString::Offsets::value_type));
605
606
114
        size_t data_length = data.size();
607
114
        res_data.resize(data_length);
608
114
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
609
610
114
        bool need_capitalize = true;
611
246
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
612
132
            auto end_index = res_offsets[offset_index];
613
132
            need_capitalize = true;
614
615
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
616
1.43k
                if (!::isalnum(res_data[i])) {
617
216
                    need_capitalize = true;
618
1.21k
                } else if (need_capitalize) {
619
                    /*
620
                    https://en.cppreference.com/w/cpp/string/byte/toupper
621
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
622
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
623
                    char my_toupper(char ch)
624
                    {
625
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
626
                    }
627
                    */
628
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
629
267
                    need_capitalize = false;
630
267
                }
631
1.43k
            }
632
633
132
            start_index = end_index;
634
132
        }
635
114
    }
636
637
    static void impl_vectors_utf8(const ColumnString::Chars& data,
638
                                  const ColumnString::Offsets& offsets,
639
                                  ColumnString::Chars& res_data,
640
58
                                  ColumnString::Offsets& res_offsets) {
641
58
        std::string result;
642
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
643
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
644
65
            uint32_t size = offsets[i] - offsets[i - 1];
645
65
            result.clear();
646
65
            to_initcap_utf8(begin, size, result);
647
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
648
65
        }
649
58
    }
650
651
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
652
65
        icu::StringPiece sp;
653
65
        sp.set(data, size);
654
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
655
65
        unicode_str.toLower();
656
65
        icu::UnicodeString output_str;
657
65
        bool need_capitalize = true;
658
65
        icu::StringCharacterIterator iter(unicode_str);
659
647
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
660
582
            if (!u_isalnum(ch)) {
661
105
                need_capitalize = true;
662
477
            } else if (need_capitalize) {
663
87
                ch = u_toupper(ch);
664
87
                need_capitalize = false;
665
87
            }
666
582
            output_str.append(ch);
667
582
        }
668
65
        output_str.toUTF8String(result);
669
65
    }
670
};
671
672
struct NameTrim {
673
    static constexpr auto name = "trim";
674
};
675
struct NameLTrim {
676
    static constexpr auto name = "ltrim";
677
};
678
struct NameRTrim {
679
    static constexpr auto name = "rtrim";
680
};
681
struct NameTrimIn {
682
    static constexpr auto name = "trim_in";
683
};
684
struct NameLTrimIn {
685
    static constexpr auto name = "ltrim_in";
686
};
687
struct NameRTrimIn {
688
    static constexpr auto name = "rtrim_in";
689
};
690
template <bool is_ltrim, bool is_rtrim, bool trim_single>
691
struct TrimUtil {
692
    static Status vector(const ColumnString::Chars& str_data,
693
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
694
300
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
300
        const size_t offset_size = str_offsets.size();
696
300
        res_offsets.resize(offset_size);
697
300
        res_data.reserve(str_data.size());
698
852
        for (size_t i = 0; i < offset_size; ++i) {
699
552
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
552
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
552
            if constexpr (is_ltrim) {
703
335
                str_begin =
704
335
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
335
            }
706
552
            if constexpr (is_rtrim) {
707
395
                str_end =
708
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
395
            }
710
711
552
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
552
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
552
        }
715
300
        return Status::OK();
716
300
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
694
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
58
        const size_t offset_size = str_offsets.size();
696
58
        res_offsets.resize(offset_size);
697
58
        res_data.reserve(str_data.size());
698
178
        for (size_t i = 0; i < offset_size; ++i) {
699
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
120
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
120
            if constexpr (is_ltrim) {
703
120
                str_begin =
704
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
120
            }
706
120
            if constexpr (is_rtrim) {
707
120
                str_end =
708
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
120
            }
710
711
120
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
120
        }
715
58
        return Status::OK();
716
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
694
52
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
52
        const size_t offset_size = str_offsets.size();
696
52
        res_offsets.resize(offset_size);
697
52
        res_data.reserve(str_data.size());
698
148
        for (size_t i = 0; i < offset_size; ++i) {
699
96
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
96
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
96
            if constexpr (is_ltrim) {
703
96
                str_begin =
704
96
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
96
            }
706
            if constexpr (is_rtrim) {
707
                str_end =
708
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
            }
710
711
96
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
96
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
96
        }
715
52
        return Status::OK();
716
52
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
694
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
94
        const size_t offset_size = str_offsets.size();
696
94
        res_offsets.resize(offset_size);
697
94
        res_data.reserve(str_data.size());
698
266
        for (size_t i = 0; i < offset_size; ++i) {
699
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
172
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
            if constexpr (is_ltrim) {
703
                str_begin =
704
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
            }
706
172
            if constexpr (is_rtrim) {
707
172
                str_end =
708
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
172
            }
710
711
172
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
172
        }
715
94
        return Status::OK();
716
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
694
24
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
24
        const size_t offset_size = str_offsets.size();
696
24
        res_offsets.resize(offset_size);
697
24
        res_data.reserve(str_data.size());
698
82
        for (size_t i = 0; i < offset_size; ++i) {
699
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
58
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
58
            if constexpr (is_ltrim) {
703
58
                str_begin =
704
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
58
            }
706
58
            if constexpr (is_rtrim) {
707
58
                str_end =
708
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
58
            }
710
711
58
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
58
        }
715
24
        return Status::OK();
716
24
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
694
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
27
        const size_t offset_size = str_offsets.size();
696
27
        res_offsets.resize(offset_size);
697
27
        res_data.reserve(str_data.size());
698
88
        for (size_t i = 0; i < offset_size; ++i) {
699
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
61
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
61
            if constexpr (is_ltrim) {
703
61
                str_begin =
704
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
61
            }
706
            if constexpr (is_rtrim) {
707
                str_end =
708
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
            }
710
711
61
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
61
        }
715
27
        return Status::OK();
716
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
694
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
695
45
        const size_t offset_size = str_offsets.size();
696
45
        res_offsets.resize(offset_size);
697
45
        res_data.reserve(str_data.size());
698
90
        for (size_t i = 0; i < offset_size; ++i) {
699
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
700
45
            const auto* str_end = str_data.data() + str_offsets[i];
701
702
            if constexpr (is_ltrim) {
703
                str_begin =
704
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
705
            }
706
45
            if constexpr (is_rtrim) {
707
45
                str_end =
708
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
709
45
            }
710
711
45
            res_data.insert_assume_reserved(str_begin, str_end);
712
            // The length of the result of the trim function will never exceed the length of the input.
713
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
714
45
        }
715
45
        return Status::OK();
716
45
    }
717
};
718
template <bool is_ltrim, bool is_rtrim, bool trim_single>
719
struct TrimInUtil {
720
    static Status vector(const ColumnString::Chars& str_data,
721
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
722
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
723
121
        const size_t offset_size = str_offsets.size();
724
121
        res_offsets.resize(offset_size);
725
121
        res_data.reserve(str_data.size());
726
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
727
121
                         simd::VStringFunctions::is_ascii(StringRef(
728
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
729
730
121
        if (all_ascii) {
731
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
732
68
        } else {
733
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
734
53
        }
735
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
722
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
723
43
        const size_t offset_size = str_offsets.size();
724
43
        res_offsets.resize(offset_size);
725
43
        res_data.reserve(str_data.size());
726
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
727
43
                         simd::VStringFunctions::is_ascii(StringRef(
728
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
729
730
43
        if (all_ascii) {
731
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
732
24
        } else {
733
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
734
19
        }
735
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
722
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
723
36
        const size_t offset_size = str_offsets.size();
724
36
        res_offsets.resize(offset_size);
725
36
        res_data.reserve(str_data.size());
726
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
727
36
                         simd::VStringFunctions::is_ascii(StringRef(
728
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
729
730
36
        if (all_ascii) {
731
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
732
19
        } else {
733
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
734
17
        }
735
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
722
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
723
42
        const size_t offset_size = str_offsets.size();
724
42
        res_offsets.resize(offset_size);
725
42
        res_data.reserve(str_data.size());
726
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
727
42
                         simd::VStringFunctions::is_ascii(StringRef(
728
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
729
730
42
        if (all_ascii) {
731
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
732
25
        } else {
733
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
734
17
        }
735
42
    }
736
737
private:
738
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
739
                                     const ColumnString::Offsets& str_offsets,
740
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
741
68
                                     ColumnString::Offsets& res_offsets) {
742
68
        const size_t offset_size = str_offsets.size();
743
68
        std::bitset<128> char_lookup;
744
68
        const char* remove_begin = remove_str.data;
745
68
        const char* remove_end = remove_str.data + remove_str.size;
746
747
251
        while (remove_begin < remove_end) {
748
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
749
183
            remove_begin += 1;
750
183
        }
751
752
136
        for (size_t i = 0; i < offset_size; ++i) {
753
68
            const char* str_begin =
754
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
755
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
756
68
            const char* left_trim_pos = str_begin;
757
68
            const char* right_trim_pos = str_end;
758
759
68
            if constexpr (is_ltrim) {
760
127
                while (left_trim_pos < str_end) {
761
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
762
30
                        break;
763
30
                    }
764
84
                    ++left_trim_pos;
765
84
                }
766
43
            }
767
768
68
            if constexpr (is_rtrim) {
769
114
                while (right_trim_pos > left_trim_pos) {
770
100
                    --right_trim_pos;
771
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
772
35
                        ++right_trim_pos;
773
35
                        break;
774
35
                    }
775
100
                }
776
49
            }
777
778
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
779
            // The length of the result of the trim function will never exceed the length of the input.
780
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
781
68
        }
782
783
68
        return Status::OK();
784
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
741
24
                                     ColumnString::Offsets& res_offsets) {
742
24
        const size_t offset_size = str_offsets.size();
743
24
        std::bitset<128> char_lookup;
744
24
        const char* remove_begin = remove_str.data;
745
24
        const char* remove_end = remove_str.data + remove_str.size;
746
747
86
        while (remove_begin < remove_end) {
748
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
749
62
            remove_begin += 1;
750
62
        }
751
752
48
        for (size_t i = 0; i < offset_size; ++i) {
753
24
            const char* str_begin =
754
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
755
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
756
24
            const char* left_trim_pos = str_begin;
757
24
            const char* right_trim_pos = str_end;
758
759
24
            if constexpr (is_ltrim) {
760
57
                while (left_trim_pos < str_end) {
761
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
762
17
                        break;
763
17
                    }
764
33
                    ++left_trim_pos;
765
33
                }
766
24
            }
767
768
24
            if constexpr (is_rtrim) {
769
39
                while (right_trim_pos > left_trim_pos) {
770
32
                    --right_trim_pos;
771
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
772
17
                        ++right_trim_pos;
773
17
                        break;
774
17
                    }
775
32
                }
776
24
            }
777
778
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
779
            // The length of the result of the trim function will never exceed the length of the input.
780
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
781
24
        }
782
783
24
        return Status::OK();
784
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
741
19
                                     ColumnString::Offsets& res_offsets) {
742
19
        const size_t offset_size = str_offsets.size();
743
19
        std::bitset<128> char_lookup;
744
19
        const char* remove_begin = remove_str.data;
745
19
        const char* remove_end = remove_str.data + remove_str.size;
746
747
73
        while (remove_begin < remove_end) {
748
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
749
54
            remove_begin += 1;
750
54
        }
751
752
38
        for (size_t i = 0; i < offset_size; ++i) {
753
19
            const char* str_begin =
754
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
755
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
756
19
            const char* left_trim_pos = str_begin;
757
19
            const char* right_trim_pos = str_end;
758
759
19
            if constexpr (is_ltrim) {
760
70
                while (left_trim_pos < str_end) {
761
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
762
13
                        break;
763
13
                    }
764
51
                    ++left_trim_pos;
765
51
                }
766
19
            }
767
768
            if constexpr (is_rtrim) {
769
                while (right_trim_pos > left_trim_pos) {
770
                    --right_trim_pos;
771
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
772
                        ++right_trim_pos;
773
                        break;
774
                    }
775
                }
776
            }
777
778
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
779
            // The length of the result of the trim function will never exceed the length of the input.
780
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
781
19
        }
782
783
19
        return Status::OK();
784
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
741
25
                                     ColumnString::Offsets& res_offsets) {
742
25
        const size_t offset_size = str_offsets.size();
743
25
        std::bitset<128> char_lookup;
744
25
        const char* remove_begin = remove_str.data;
745
25
        const char* remove_end = remove_str.data + remove_str.size;
746
747
92
        while (remove_begin < remove_end) {
748
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
749
67
            remove_begin += 1;
750
67
        }
751
752
50
        for (size_t i = 0; i < offset_size; ++i) {
753
25
            const char* str_begin =
754
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
755
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
756
25
            const char* left_trim_pos = str_begin;
757
25
            const char* right_trim_pos = str_end;
758
759
            if constexpr (is_ltrim) {
760
                while (left_trim_pos < str_end) {
761
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
762
                        break;
763
                    }
764
                    ++left_trim_pos;
765
                }
766
            }
767
768
25
            if constexpr (is_rtrim) {
769
75
                while (right_trim_pos > left_trim_pos) {
770
68
                    --right_trim_pos;
771
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
772
18
                        ++right_trim_pos;
773
18
                        break;
774
18
                    }
775
68
                }
776
25
            }
777
778
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
779
            // The length of the result of the trim function will never exceed the length of the input.
780
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
781
25
        }
782
783
25
        return Status::OK();
784
25
    }
785
786
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
787
                                    const ColumnString::Offsets& str_offsets,
788
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
789
53
                                    ColumnString::Offsets& res_offsets) {
790
53
        const size_t offset_size = str_offsets.size();
791
53
        res_offsets.resize(offset_size);
792
53
        res_data.reserve(str_data.size());
793
794
53
        std::unordered_set<std::string_view> char_lookup;
795
53
        const char* remove_begin = remove_str.data;
796
53
        const char* remove_end = remove_str.data + remove_str.size;
797
798
240
        while (remove_begin < remove_end) {
799
187
            size_t byte_len, char_len;
800
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
801
187
                    remove_begin, remove_end, 1);
802
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
803
187
            remove_begin += byte_len;
804
187
        }
805
806
140
        for (size_t i = 0; i < offset_size; ++i) {
807
87
            const char* str_begin =
808
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
809
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
810
87
            const char* left_trim_pos = str_begin;
811
87
            const char* right_trim_pos = str_end;
812
813
87
            if constexpr (is_ltrim) {
814
81
                while (left_trim_pos < str_end) {
815
73
                    size_t byte_len, char_len;
816
73
                    std::tie(byte_len, char_len) =
817
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
818
73
                                                                                   str_end, 1);
819
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
820
73
                        char_lookup.end()) {
821
52
                        break;
822
52
                    }
823
21
                    left_trim_pos += byte_len;
824
21
                }
825
60
            }
826
827
87
            if constexpr (is_rtrim) {
828
88
                while (right_trim_pos > left_trim_pos) {
829
80
                    const char* prev_char_pos = right_trim_pos;
830
156
                    do {
831
156
                        --prev_char_pos;
832
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
833
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
834
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
835
80
                        char_lookup.end()) {
836
52
                        break;
837
52
                    }
838
28
                    right_trim_pos = prev_char_pos;
839
28
                }
840
60
            }
841
842
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
843
            // The length of the result of the trim function will never exceed the length of the input.
844
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
845
87
        }
846
53
        return Status::OK();
847
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
789
19
                                    ColumnString::Offsets& res_offsets) {
790
19
        const size_t offset_size = str_offsets.size();
791
19
        res_offsets.resize(offset_size);
792
19
        res_data.reserve(str_data.size());
793
794
19
        std::unordered_set<std::string_view> char_lookup;
795
19
        const char* remove_begin = remove_str.data;
796
19
        const char* remove_end = remove_str.data + remove_str.size;
797
798
84
        while (remove_begin < remove_end) {
799
65
            size_t byte_len, char_len;
800
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
801
65
                    remove_begin, remove_end, 1);
802
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
803
65
            remove_begin += byte_len;
804
65
        }
805
806
52
        for (size_t i = 0; i < offset_size; ++i) {
807
33
            const char* str_begin =
808
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
809
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
810
33
            const char* left_trim_pos = str_begin;
811
33
            const char* right_trim_pos = str_end;
812
813
33
            if constexpr (is_ltrim) {
814
45
                while (left_trim_pos < str_end) {
815
41
                    size_t byte_len, char_len;
816
41
                    std::tie(byte_len, char_len) =
817
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
818
41
                                                                                   str_end, 1);
819
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
820
41
                        char_lookup.end()) {
821
29
                        break;
822
29
                    }
823
12
                    left_trim_pos += byte_len;
824
12
                }
825
33
            }
826
827
33
            if constexpr (is_rtrim) {
828
48
                while (right_trim_pos > left_trim_pos) {
829
44
                    const char* prev_char_pos = right_trim_pos;
830
90
                    do {
831
90
                        --prev_char_pos;
832
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
833
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
834
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
835
44
                        char_lookup.end()) {
836
29
                        break;
837
29
                    }
838
15
                    right_trim_pos = prev_char_pos;
839
15
                }
840
33
            }
841
842
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
843
            // The length of the result of the trim function will never exceed the length of the input.
844
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
845
33
        }
846
19
        return Status::OK();
847
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
789
17
                                    ColumnString::Offsets& res_offsets) {
790
17
        const size_t offset_size = str_offsets.size();
791
17
        res_offsets.resize(offset_size);
792
17
        res_data.reserve(str_data.size());
793
794
17
        std::unordered_set<std::string_view> char_lookup;
795
17
        const char* remove_begin = remove_str.data;
796
17
        const char* remove_end = remove_str.data + remove_str.size;
797
798
78
        while (remove_begin < remove_end) {
799
61
            size_t byte_len, char_len;
800
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
801
61
                    remove_begin, remove_end, 1);
802
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
803
61
            remove_begin += byte_len;
804
61
        }
805
806
44
        for (size_t i = 0; i < offset_size; ++i) {
807
27
            const char* str_begin =
808
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
809
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
810
27
            const char* left_trim_pos = str_begin;
811
27
            const char* right_trim_pos = str_end;
812
813
27
            if constexpr (is_ltrim) {
814
36
                while (left_trim_pos < str_end) {
815
32
                    size_t byte_len, char_len;
816
32
                    std::tie(byte_len, char_len) =
817
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
818
32
                                                                                   str_end, 1);
819
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
820
32
                        char_lookup.end()) {
821
23
                        break;
822
23
                    }
823
9
                    left_trim_pos += byte_len;
824
9
                }
825
27
            }
826
827
            if constexpr (is_rtrim) {
828
                while (right_trim_pos > left_trim_pos) {
829
                    const char* prev_char_pos = right_trim_pos;
830
                    do {
831
                        --prev_char_pos;
832
                    } while ((*prev_char_pos & 0xC0) == 0x80);
833
                    size_t byte_len = right_trim_pos - prev_char_pos;
834
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
835
                        char_lookup.end()) {
836
                        break;
837
                    }
838
                    right_trim_pos = prev_char_pos;
839
                }
840
            }
841
842
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
843
            // The length of the result of the trim function will never exceed the length of the input.
844
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
845
27
        }
846
17
        return Status::OK();
847
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
789
17
                                    ColumnString::Offsets& res_offsets) {
790
17
        const size_t offset_size = str_offsets.size();
791
17
        res_offsets.resize(offset_size);
792
17
        res_data.reserve(str_data.size());
793
794
17
        std::unordered_set<std::string_view> char_lookup;
795
17
        const char* remove_begin = remove_str.data;
796
17
        const char* remove_end = remove_str.data + remove_str.size;
797
798
78
        while (remove_begin < remove_end) {
799
61
            size_t byte_len, char_len;
800
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
801
61
                    remove_begin, remove_end, 1);
802
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
803
61
            remove_begin += byte_len;
804
61
        }
805
806
44
        for (size_t i = 0; i < offset_size; ++i) {
807
27
            const char* str_begin =
808
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
809
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
810
27
            const char* left_trim_pos = str_begin;
811
27
            const char* right_trim_pos = str_end;
812
813
            if constexpr (is_ltrim) {
814
                while (left_trim_pos < str_end) {
815
                    size_t byte_len, char_len;
816
                    std::tie(byte_len, char_len) =
817
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
818
                                                                                   str_end, 1);
819
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
820
                        char_lookup.end()) {
821
                        break;
822
                    }
823
                    left_trim_pos += byte_len;
824
                }
825
            }
826
827
27
            if constexpr (is_rtrim) {
828
40
                while (right_trim_pos > left_trim_pos) {
829
36
                    const char* prev_char_pos = right_trim_pos;
830
66
                    do {
831
66
                        --prev_char_pos;
832
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
833
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
834
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
835
36
                        char_lookup.end()) {
836
23
                        break;
837
23
                    }
838
13
                    right_trim_pos = prev_char_pos;
839
13
                }
840
27
            }
841
842
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
843
            // The length of the result of the trim function will never exceed the length of the input.
844
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
845
27
        }
846
17
        return Status::OK();
847
17
    }
848
};
849
// This is an implementation of a parameter for the Trim function.
850
template <bool is_ltrim, bool is_rtrim, typename Name>
851
struct Trim1Impl {
852
    static constexpr auto name = Name::name;
853
854
145
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
854
43
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
854
33
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
854
39
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
854
7
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
854
11
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
854
12
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
855
856
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
857
139
                          uint32_t result, size_t input_rows_count) {
858
139
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
139
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
139
            auto col_res = ColumnString::create();
861
139
            char blank[] = " ";
862
139
            const StringRef remove_str(blank, 1);
863
139
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
139
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
139
                    col_res->get_offsets())));
866
139
            block.replace_by_position(result, std::move(col_res));
867
139
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
139
        return Status::OK();
873
139
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
857
48
                          uint32_t result, size_t input_rows_count) {
858
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
48
            auto col_res = ColumnString::create();
861
48
            char blank[] = " ";
862
48
            const StringRef remove_str(blank, 1);
863
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
48
                    col_res->get_offsets())));
866
48
            block.replace_by_position(result, std::move(col_res));
867
48
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
48
        return Status::OK();
873
48
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
857
37
                          uint32_t result, size_t input_rows_count) {
858
37
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
37
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
37
            auto col_res = ColumnString::create();
861
37
            char blank[] = " ";
862
37
            const StringRef remove_str(blank, 1);
863
37
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
37
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
37
                    col_res->get_offsets())));
866
37
            block.replace_by_position(result, std::move(col_res));
867
37
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
37
        return Status::OK();
873
37
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
857
42
                          uint32_t result, size_t input_rows_count) {
858
42
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
42
            auto col_res = ColumnString::create();
861
42
            char blank[] = " ";
862
42
            const StringRef remove_str(blank, 1);
863
42
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
42
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
42
                    col_res->get_offsets())));
866
42
            block.replace_by_position(result, std::move(col_res));
867
42
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
42
        return Status::OK();
873
42
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
857
1
                          uint32_t result, size_t input_rows_count) {
858
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
1
            auto col_res = ColumnString::create();
861
1
            char blank[] = " ";
862
1
            const StringRef remove_str(blank, 1);
863
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
1
                    col_res->get_offsets())));
866
1
            block.replace_by_position(result, std::move(col_res));
867
1
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
1
        return Status::OK();
873
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
857
5
                          uint32_t result, size_t input_rows_count) {
858
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
5
            auto col_res = ColumnString::create();
861
5
            char blank[] = " ";
862
5
            const StringRef remove_str(blank, 1);
863
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
5
                    col_res->get_offsets())));
866
5
            block.replace_by_position(result, std::move(col_res));
867
5
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
5
        return Status::OK();
873
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
857
6
                          uint32_t result, size_t input_rows_count) {
858
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
859
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
860
6
            auto col_res = ColumnString::create();
861
6
            char blank[] = " ";
862
6
            const StringRef remove_str(blank, 1);
863
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
864
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
865
6
                    col_res->get_offsets())));
866
6
            block.replace_by_position(result, std::move(col_res));
867
6
        } else {
868
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
869
0
                                        block.get_by_position(arguments[0]).column->get_name(),
870
0
                                        name);
871
0
        }
872
6
        return Status::OK();
873
6
    }
874
};
875
876
// This is an implementation of two parameters for the Trim function.
877
template <bool is_ltrim, bool is_rtrim, typename Name>
878
struct Trim2Impl {
879
    static constexpr auto name = Name::name;
880
881
214
    static DataTypes get_variadic_argument_types() {
882
214
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
214
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
881
18
    static DataTypes get_variadic_argument_types() {
882
18
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
18
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
881
27
    static DataTypes get_variadic_argument_types() {
882
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
27
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
881
82
    static DataTypes get_variadic_argument_types() {
882
82
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
82
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
881
25
    static DataTypes get_variadic_argument_types() {
882
25
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
25
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
881
27
    static DataTypes get_variadic_argument_types() {
882
27
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
27
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
881
35
    static DataTypes get_variadic_argument_types() {
882
35
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
883
35
    }
884
885
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
886
282
                          uint32_t result, size_t input_rows_count) {
887
282
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
282
        const auto& rcol =
889
282
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
282
                        ->get_data_column_ptr();
891
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
282
                auto col_res = ColumnString::create();
894
282
                const auto* remove_str_raw = col_right->get_chars().data();
895
282
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
282
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
282
                if (remove_str.size == 1) {
899
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
65
                            col_res->get_offsets())));
902
217
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
121
                                  std::is_same<Name, NameRTrimIn>::value) {
906
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
121
                                col->get_chars(), col->get_offsets(), remove_str,
908
121
                                col_res->get_chars(), col_res->get_offsets())));
909
121
                    } else {
910
96
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
96
                                col->get_chars(), col->get_offsets(), remove_str,
912
96
                                col_res->get_chars(), col_res->get_offsets())));
913
96
                    }
914
217
                }
915
282
                block.replace_by_position(result, std::move(col_res));
916
282
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
282
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
282
        return Status::OK();
928
282
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
26
                          uint32_t result, size_t input_rows_count) {
887
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
26
        const auto& rcol =
889
26
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
26
                        ->get_data_column_ptr();
891
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
26
                auto col_res = ColumnString::create();
894
26
                const auto* remove_str_raw = col_right->get_chars().data();
895
26
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
26
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
26
                if (remove_str.size == 1) {
899
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
2
                            col_res->get_offsets())));
902
24
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
                                  std::is_same<Name, NameRTrimIn>::value) {
906
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
                                col->get_chars(), col->get_offsets(), remove_str,
908
                                col_res->get_chars(), col_res->get_offsets())));
909
24
                    } else {
910
24
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
24
                                col->get_chars(), col->get_offsets(), remove_str,
912
24
                                col_res->get_chars(), col_res->get_offsets())));
913
24
                    }
914
24
                }
915
26
                block.replace_by_position(result, std::move(col_res));
916
26
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
26
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
26
        return Status::OK();
928
26
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
32
                          uint32_t result, size_t input_rows_count) {
887
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
32
        const auto& rcol =
889
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
32
                        ->get_data_column_ptr();
891
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
32
                auto col_res = ColumnString::create();
894
32
                const auto* remove_str_raw = col_right->get_chars().data();
895
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
32
                if (remove_str.size == 1) {
899
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
5
                            col_res->get_offsets())));
902
27
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
                                  std::is_same<Name, NameRTrimIn>::value) {
906
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
                                col->get_chars(), col->get_offsets(), remove_str,
908
                                col_res->get_chars(), col_res->get_offsets())));
909
27
                    } else {
910
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
27
                                col->get_chars(), col->get_offsets(), remove_str,
912
27
                                col_res->get_chars(), col_res->get_offsets())));
913
27
                    }
914
27
                }
915
32
                block.replace_by_position(result, std::move(col_res));
916
32
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
32
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
32
        return Status::OK();
928
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
85
                          uint32_t result, size_t input_rows_count) {
887
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
85
        const auto& rcol =
889
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
85
                        ->get_data_column_ptr();
891
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
85
                auto col_res = ColumnString::create();
894
85
                const auto* remove_str_raw = col_right->get_chars().data();
895
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
85
                if (remove_str.size == 1) {
899
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
40
                            col_res->get_offsets())));
902
45
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
                                  std::is_same<Name, NameRTrimIn>::value) {
906
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
                                col->get_chars(), col->get_offsets(), remove_str,
908
                                col_res->get_chars(), col_res->get_offsets())));
909
45
                    } else {
910
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
45
                                col->get_chars(), col->get_offsets(), remove_str,
912
45
                                col_res->get_chars(), col_res->get_offsets())));
913
45
                    }
914
45
                }
915
85
                block.replace_by_position(result, std::move(col_res));
916
85
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
85
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
85
        return Status::OK();
928
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
50
                          uint32_t result, size_t input_rows_count) {
887
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
50
        const auto& rcol =
889
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
50
                        ->get_data_column_ptr();
891
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
50
                auto col_res = ColumnString::create();
894
50
                const auto* remove_str_raw = col_right->get_chars().data();
895
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
50
                if (remove_str.size == 1) {
899
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
7
                            col_res->get_offsets())));
902
43
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
43
                                  std::is_same<Name, NameRTrimIn>::value) {
906
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
43
                                col->get_chars(), col->get_offsets(), remove_str,
908
43
                                col_res->get_chars(), col_res->get_offsets())));
909
                    } else {
910
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
                                col->get_chars(), col->get_offsets(), remove_str,
912
                                col_res->get_chars(), col_res->get_offsets())));
913
                    }
914
43
                }
915
50
                block.replace_by_position(result, std::move(col_res));
916
50
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
50
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
50
        return Status::OK();
928
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
41
                          uint32_t result, size_t input_rows_count) {
887
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
41
        const auto& rcol =
889
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
41
                        ->get_data_column_ptr();
891
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
41
                auto col_res = ColumnString::create();
894
41
                const auto* remove_str_raw = col_right->get_chars().data();
895
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
41
                if (remove_str.size == 1) {
899
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
5
                            col_res->get_offsets())));
902
36
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
36
                                  std::is_same<Name, NameRTrimIn>::value) {
906
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
36
                                col->get_chars(), col->get_offsets(), remove_str,
908
36
                                col_res->get_chars(), col_res->get_offsets())));
909
                    } else {
910
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
                                col->get_chars(), col->get_offsets(), remove_str,
912
                                col_res->get_chars(), col_res->get_offsets())));
913
                    }
914
36
                }
915
41
                block.replace_by_position(result, std::move(col_res));
916
41
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
41
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
41
        return Status::OK();
928
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
886
48
                          uint32_t result, size_t input_rows_count) {
887
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
888
48
        const auto& rcol =
889
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
890
48
                        ->get_data_column_ptr();
891
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
892
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
893
48
                auto col_res = ColumnString::create();
894
48
                const auto* remove_str_raw = col_right->get_chars().data();
895
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
896
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
897
898
48
                if (remove_str.size == 1) {
899
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
900
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
901
6
                            col_res->get_offsets())));
902
42
                } else {
903
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
904
                                  std::is_same<Name, NameLTrimIn>::value ||
905
42
                                  std::is_same<Name, NameRTrimIn>::value) {
906
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
907
42
                                col->get_chars(), col->get_offsets(), remove_str,
908
42
                                col_res->get_chars(), col_res->get_offsets())));
909
                    } else {
910
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
911
                                col->get_chars(), col->get_offsets(), remove_str,
912
                                col_res->get_chars(), col_res->get_offsets())));
913
                    }
914
42
                }
915
48
                block.replace_by_position(result, std::move(col_res));
916
48
            } else {
917
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
918
0
                                            block.get_by_position(arguments[1]).column->get_name(),
919
0
                                            name);
920
0
            }
921
922
48
        } else {
923
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
924
0
                                        block.get_by_position(arguments[0]).column->get_name(),
925
0
                                        name);
926
0
        }
927
48
        return Status::OK();
928
48
    }
929
};
930
931
template <typename impl>
932
class FunctionTrim : public IFunction {
933
public:
934
    static constexpr auto name = impl::name;
935
371
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
935
44
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
935
34
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
935
40
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
935
19
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
935
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
935
83
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
935
8
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
935
12
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
935
13
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
935
26
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
935
28
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
935
36
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
936
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
936
1
    String get_name() const override { return impl::name; }
937
938
287
    size_t get_number_of_arguments() const override {
939
287
        return get_variadic_argument_types_impl().size();
940
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
938
37
    size_t get_number_of_arguments() const override {
939
37
        return get_variadic_argument_types_impl().size();
940
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
938
27
    size_t get_number_of_arguments() const override {
939
27
        return get_variadic_argument_types_impl().size();
940
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
938
33
    size_t get_number_of_arguments() const override {
939
33
        return get_variadic_argument_types_impl().size();
940
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
938
12
    size_t get_number_of_arguments() const override {
939
12
        return get_variadic_argument_types_impl().size();
940
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
938
21
    size_t get_number_of_arguments() const override {
939
21
        return get_variadic_argument_types_impl().size();
940
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
938
76
    size_t get_number_of_arguments() const override {
939
76
        return get_variadic_argument_types_impl().size();
940
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
938
1
    size_t get_number_of_arguments() const override {
939
1
        return get_variadic_argument_types_impl().size();
940
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
938
5
    size_t get_number_of_arguments() const override {
939
5
        return get_variadic_argument_types_impl().size();
940
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
938
6
    size_t get_number_of_arguments() const override {
939
6
        return get_variadic_argument_types_impl().size();
940
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
938
19
    size_t get_number_of_arguments() const override {
939
19
        return get_variadic_argument_types_impl().size();
940
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
938
21
    size_t get_number_of_arguments() const override {
939
21
        return get_variadic_argument_types_impl().size();
940
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
938
29
    size_t get_number_of_arguments() const override {
939
29
        return get_variadic_argument_types_impl().size();
940
29
    }
941
942
287
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
287
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
287
        return arguments[0];
949
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
37
        return arguments[0];
949
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
27
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
27
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
27
        return arguments[0];
949
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
33
        return arguments[0];
949
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
12
        return arguments[0];
949
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
21
        return arguments[0];
949
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
76
        return arguments[0];
949
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
1
        return arguments[0];
949
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
5
        return arguments[0];
949
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
6
        return arguments[0];
949
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
19
        return arguments[0];
949
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
21
        return arguments[0];
949
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
942
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
943
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
944
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
945
0
                                   "Illegal type {} of argument of function {}",
946
0
                                   arguments[0]->get_name(), get_name());
947
0
        }
948
29
        return arguments[0];
949
29
    }
950
    // The second parameter of "trim" is a constant.
951
570
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
58
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
37
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
951
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
952
953
359
    DataTypes get_variadic_argument_types_impl() const override {
954
359
        return impl::get_variadic_argument_types();
955
359
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
43
    DataTypes get_variadic_argument_types_impl() const override {
954
43
        return impl::get_variadic_argument_types();
955
43
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
33
    DataTypes get_variadic_argument_types_impl() const override {
954
33
        return impl::get_variadic_argument_types();
955
33
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
39
    DataTypes get_variadic_argument_types_impl() const override {
954
39
        return impl::get_variadic_argument_types();
955
39
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
18
    DataTypes get_variadic_argument_types_impl() const override {
954
18
        return impl::get_variadic_argument_types();
955
18
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
27
    DataTypes get_variadic_argument_types_impl() const override {
954
27
        return impl::get_variadic_argument_types();
955
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
82
    DataTypes get_variadic_argument_types_impl() const override {
954
82
        return impl::get_variadic_argument_types();
955
82
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
7
    DataTypes get_variadic_argument_types_impl() const override {
954
7
        return impl::get_variadic_argument_types();
955
7
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
11
    DataTypes get_variadic_argument_types_impl() const override {
954
11
        return impl::get_variadic_argument_types();
955
11
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
12
    DataTypes get_variadic_argument_types_impl() const override {
954
12
        return impl::get_variadic_argument_types();
955
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
25
    DataTypes get_variadic_argument_types_impl() const override {
954
25
        return impl::get_variadic_argument_types();
955
25
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
27
    DataTypes get_variadic_argument_types_impl() const override {
954
27
        return impl::get_variadic_argument_types();
955
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
953
35
    DataTypes get_variadic_argument_types_impl() const override {
954
35
        return impl::get_variadic_argument_types();
955
35
    }
956
957
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
958
421
                        uint32_t result, size_t input_rows_count) const override {
959
421
        return impl::execute(context, block, arguments, result, input_rows_count);
960
421
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
48
                        uint32_t result, size_t input_rows_count) const override {
959
48
        return impl::execute(context, block, arguments, result, input_rows_count);
960
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
37
                        uint32_t result, size_t input_rows_count) const override {
959
37
        return impl::execute(context, block, arguments, result, input_rows_count);
960
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
42
                        uint32_t result, size_t input_rows_count) const override {
959
42
        return impl::execute(context, block, arguments, result, input_rows_count);
960
42
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
26
                        uint32_t result, size_t input_rows_count) const override {
959
26
        return impl::execute(context, block, arguments, result, input_rows_count);
960
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
32
                        uint32_t result, size_t input_rows_count) const override {
959
32
        return impl::execute(context, block, arguments, result, input_rows_count);
960
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
85
                        uint32_t result, size_t input_rows_count) const override {
959
85
        return impl::execute(context, block, arguments, result, input_rows_count);
960
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
1
                        uint32_t result, size_t input_rows_count) const override {
959
1
        return impl::execute(context, block, arguments, result, input_rows_count);
960
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
5
                        uint32_t result, size_t input_rows_count) const override {
959
5
        return impl::execute(context, block, arguments, result, input_rows_count);
960
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
6
                        uint32_t result, size_t input_rows_count) const override {
959
6
        return impl::execute(context, block, arguments, result, input_rows_count);
960
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
50
                        uint32_t result, size_t input_rows_count) const override {
959
50
        return impl::execute(context, block, arguments, result, input_rows_count);
960
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
41
                        uint32_t result, size_t input_rows_count) const override {
959
41
        return impl::execute(context, block, arguments, result, input_rows_count);
960
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
958
48
                        uint32_t result, size_t input_rows_count) const override {
959
48
        return impl::execute(context, block, arguments, result, input_rows_count);
960
48
    }
961
};
962
963
struct UnHexImplEmpty {
964
    static constexpr auto name = "unhex";
965
};
966
967
struct UnHexImplNull {
968
    static constexpr auto name = "unhex_null";
969
};
970
971
template <typename Name>
972
struct UnHexImpl {
973
    static constexpr auto name = Name::name;
974
    using ReturnType = DataTypeString;
975
    using ColumnType = ColumnString;
976
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
977
978
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
979
149
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
980
149
        auto rows_count = offsets.size();
981
149
        dst_offsets.resize(rows_count);
982
983
149
        int64_t total_size = 0;
984
346
        for (size_t i = 0; i < rows_count; i++) {
985
197
            size_t len = offsets[i] - offsets[i - 1];
986
197
            total_size += len / 2;
987
197
        }
988
149
        ColumnString::check_chars_length(total_size, rows_count);
989
149
        dst_data.resize(total_size);
990
149
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
991
149
        size_t offset = 0;
992
993
346
        for (int i = 0; i < rows_count; ++i) {
994
197
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
995
197
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
996
997
197
            if (UNLIKELY(srclen == 0)) {
998
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
999
13
                continue;
1000
13
            }
1001
1002
184
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1003
1004
184
            offset += outlen;
1005
184
            dst_offsets[i] = cast_set<uint32_t>(offset);
1006
184
        }
1007
149
        dst_data.pop_back(total_size - offset);
1008
149
        return Status::OK();
1009
149
    }
1010
1011
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1012
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1013
33
                         ColumnUInt8::Container* null_map_data) {
1014
33
        auto rows_count = offsets.size();
1015
33
        dst_offsets.resize(rows_count);
1016
1017
33
        int64_t total_size = 0;
1018
84
        for (size_t i = 0; i < rows_count; i++) {
1019
51
            size_t len = offsets[i] - offsets[i - 1];
1020
51
            total_size += len / 2;
1021
51
        }
1022
33
        ColumnString::check_chars_length(total_size, rows_count);
1023
33
        dst_data.resize(total_size);
1024
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1025
33
        size_t offset = 0;
1026
1027
84
        for (int i = 0; i < rows_count; ++i) {
1028
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1029
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1030
1031
51
            if (UNLIKELY(srclen == 0)) {
1032
7
                (*null_map_data)[i] = 1;
1033
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1034
7
                continue;
1035
7
            }
1036
1037
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1038
1039
44
            if (outlen == 0) {
1040
13
                (*null_map_data)[i] = 1;
1041
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1042
13
                continue;
1043
13
            }
1044
1045
31
            offset += outlen;
1046
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1047
31
        }
1048
33
        dst_data.pop_back(total_size - offset);
1049
33
        return Status::OK();
1050
33
    }
1051
};
1052
1053
struct NameStringSpace {
1054
    static constexpr auto name = "space";
1055
};
1056
1057
struct StringSpace {
1058
    using ReturnType = DataTypeString;
1059
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1060
    using Type = Int32;
1061
    using ReturnColumnType = ColumnString;
1062
1063
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1064
10
                         ColumnString::Offsets& res_offsets) {
1065
10
        res_offsets.resize(data.size());
1066
10
        size_t input_size = res_offsets.size();
1067
10
        int64_t total_size = 0;
1068
34
        for (size_t i = 0; i < input_size; ++i) {
1069
24
            if (data[i] > 0) {
1070
14
                total_size += data[i];
1071
14
            }
1072
24
        }
1073
10
        ColumnString::check_chars_length(total_size, input_size);
1074
10
        res_data.reserve(total_size);
1075
1076
34
        for (size_t i = 0; i < input_size; ++i) {
1077
24
            if (data[i] > 0) [[likely]] {
1078
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1079
14
                cast_set(res_offsets[i], res_data.size());
1080
14
            } else {
1081
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1082
10
            }
1083
24
        }
1084
10
        return Status::OK();
1085
10
    }
1086
};
1087
1088
struct ToBase64Impl {
1089
    static constexpr auto name = "to_base64";
1090
    using ReturnType = DataTypeString;
1091
    using ColumnType = ColumnString;
1092
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1093
1094
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1095
107
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1096
107
        auto rows_count = offsets.size();
1097
107
        dst_offsets.resize(rows_count);
1098
1099
107
        size_t total_size = 0;
1100
250
        for (size_t i = 0; i < rows_count; i++) {
1101
143
            size_t len = offsets[i] - offsets[i - 1];
1102
143
            total_size += 4 * ((len + 2) / 3);
1103
143
        }
1104
107
        ColumnString::check_chars_length(total_size, rows_count);
1105
107
        dst_data.resize(total_size);
1106
107
        auto* dst_data_ptr = dst_data.data();
1107
107
        size_t offset = 0;
1108
1109
250
        for (int i = 0; i < rows_count; ++i) {
1110
143
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1111
143
            size_t srclen = offsets[i] - offsets[i - 1];
1112
1113
143
            if (UNLIKELY(srclen == 0)) {
1114
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1115
7
                continue;
1116
7
            }
1117
1118
136
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1119
136
                                               (unsigned char*)(dst_data_ptr + offset));
1120
1121
136
            offset += outlen;
1122
136
            dst_offsets[i] = cast_set<uint32_t>(offset);
1123
136
        }
1124
107
        dst_data.pop_back(total_size - offset);
1125
107
        return Status::OK();
1126
107
    }
1127
};
1128
1129
struct FromBase64Impl {
1130
    static constexpr auto name = "from_base64";
1131
    using ReturnType = DataTypeString;
1132
    using ColumnType = ColumnString;
1133
1134
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1135
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1136
109
                         NullMap& null_map) {
1137
109
        auto rows_count = offsets.size();
1138
109
        dst_offsets.resize(rows_count);
1139
1140
109
        size_t total_size = 0;
1141
271
        for (size_t i = 0; i < rows_count; i++) {
1142
162
            auto len = offsets[i] - offsets[i - 1];
1143
162
            total_size += len / 4 * 3;
1144
162
        }
1145
109
        ColumnString::check_chars_length(total_size, rows_count);
1146
109
        dst_data.resize(total_size);
1147
109
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1148
109
        size_t offset = 0;
1149
1150
271
        for (int i = 0; i < rows_count; ++i) {
1151
162
            if (UNLIKELY(null_map[i])) {
1152
0
                null_map[i] = 1;
1153
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1154
0
                continue;
1155
0
            }
1156
1157
162
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1158
162
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1159
1160
162
            if (UNLIKELY(srclen == 0)) {
1161
6
                dst_offsets[i] = cast_set<uint32_t>(offset);
1162
6
                continue;
1163
6
            }
1164
1165
156
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1166
1167
156
            if (outlen < 0) {
1168
60
                null_map[i] = 1;
1169
60
                dst_offsets[i] = cast_set<uint32_t>(offset);
1170
96
            } else {
1171
96
                offset += outlen;
1172
96
                dst_offsets[i] = cast_set<uint32_t>(offset);
1173
96
            }
1174
156
        }
1175
109
        dst_data.pop_back(total_size - offset);
1176
109
        return Status::OK();
1177
109
    }
1178
};
1179
1180
struct StringAppendTrailingCharIfAbsent {
1181
    static constexpr auto name = "append_trailing_char_if_absent";
1182
    using Chars = ColumnString::Chars;
1183
    using Offsets = ColumnString::Offsets;
1184
    using ReturnType = DataTypeString;
1185
    using ColumnType = ColumnString;
1186
1187
48
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1188
48
        if (str.size < end.size) {
1189
11
            return false;
1190
11
        }
1191
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1192
37
        return str.end_with(end);
1193
48
    }
1194
1195
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1196
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1197
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1198
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1199
56
        size_t input_rows_count = loffsets.size();
1200
56
        res_offsets.resize(input_rows_count);
1201
56
        fmt::memory_buffer buffer;
1202
1203
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1204
102
            buffer.clear();
1205
1206
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1207
102
                                       loffsets[i] - loffsets[i - 1]);
1208
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1209
102
                                       roffsets[i] - roffsets[i - 1]);
1210
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1211
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1212
102
                    rstr.begin(), rstr.end(), 2);
1213
1214
102
            if (char_len != 1) {
1215
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1216
66
                continue;
1217
66
            }
1218
36
            if (str_end_with(lstr, rstr)) {
1219
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1220
9
                continue;
1221
9
            }
1222
1223
27
            buffer.append(lstr.begin(), lstr.end());
1224
27
            buffer.append(rstr.begin(), rstr.end());
1225
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1226
27
                                        res_offsets);
1227
27
        }
1228
56
    }
1229
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1230
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1231
8
                              NullMap& null_map_data) {
1232
8
        size_t input_rows_count = loffsets.size();
1233
8
        res_offsets.resize(input_rows_count);
1234
8
        fmt::memory_buffer buffer;
1235
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1236
8
        auto [byte_len, char_len] =
1237
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1238
8
        if (char_len != 1) {
1239
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1240
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1241
2
            }
1242
2
            return;
1243
2
        }
1244
1245
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1246
6
            buffer.clear();
1247
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1248
6
                                       loffsets[i] - loffsets[i - 1]);
1249
1250
6
            if (str_end_with(lstr, rstr)) {
1251
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1252
2
                continue;
1253
2
            }
1254
1255
4
            buffer.append(lstr.begin(), lstr.end());
1256
4
            buffer.append(rstr.begin(), rstr.end());
1257
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1258
4
                                        res_offsets);
1259
4
        }
1260
6
    }
1261
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1262
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1263
8
                              NullMap& null_map_data) {
1264
8
        size_t input_rows_count = roffsets.size();
1265
8
        res_offsets.resize(input_rows_count);
1266
8
        fmt::memory_buffer buffer;
1267
1268
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1269
8
            buffer.clear();
1270
1271
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1272
8
                                       roffsets[i] - roffsets[i - 1]);
1273
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1274
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1275
8
                    rstr.begin(), rstr.end(), 2);
1276
1277
8
            if (char_len != 1) {
1278
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1279
2
                continue;
1280
2
            }
1281
6
            if (str_end_with(lstr, rstr)) {
1282
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1283
2
                continue;
1284
2
            }
1285
1286
4
            buffer.append(lstr.begin(), lstr.end());
1287
4
            buffer.append(rstr.begin(), rstr.end());
1288
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1289
4
                                        res_offsets);
1290
4
        }
1291
8
    }
1292
};
1293
1294
struct StringLPad {
1295
    static constexpr auto name = "lpad";
1296
    static constexpr auto is_lpad = true;
1297
};
1298
1299
struct StringRPad {
1300
    static constexpr auto name = "rpad";
1301
    static constexpr auto is_lpad = false;
1302
};
1303
1304
template <typename LeftDataType, typename RightDataType>
1305
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1306
1307
template <typename LeftDataType, typename RightDataType>
1308
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1309
1310
template <typename LeftDataType, typename RightDataType>
1311
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1312
1313
// ready for regist function
1314
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1315
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1316
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1317
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1318
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1319
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1320
using FunctionStringStartsWith =
1321
        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
1322
using FunctionStringEndsWith =
1323
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1324
using FunctionStringInstr =
1325
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1326
using FunctionStringLocate =
1327
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1328
using FunctionStringFindInSet =
1329
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1330
1331
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1332
1333
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1334
1335
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1336
1337
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1338
1339
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1340
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1341
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1342
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1343
1344
using FunctionStringAppendTrailingCharIfAbsent =
1345
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1346
1347
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1348
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1349
1350
extern void register_function_string_basic(SimpleFunctionFactory& factory);
1351
extern void register_function_string_digest(SimpleFunctionFactory& factory);
1352
extern void register_function_string_mask(SimpleFunctionFactory& factory);
1353
extern void register_function_string_misc(SimpleFunctionFactory& factory);
1354
extern void register_function_string_search(SimpleFunctionFactory& factory);
1355
extern void register_function_string_url(SimpleFunctionFactory& factory);
1356
1357
6
void register_function_string(SimpleFunctionFactory& factory) {
1358
6
    register_function_string_basic(factory);
1359
6
    register_function_string_digest(factory);
1360
6
    register_function_string_mask(factory);
1361
6
    register_function_string_misc(factory);
1362
6
    register_function_string_search(factory);
1363
6
    register_function_string_url(factory);
1364
1365
6
    factory.register_function<FunctionStringParseDataSize>();
1366
6
    factory.register_function<FunctionStringASCII>();
1367
6
    factory.register_function<FunctionStringLength>();
1368
6
    factory.register_function<FunctionCrc32>();
1369
6
    factory.register_function<FunctionStringUTF8Length>();
1370
6
    factory.register_function<FunctionStringSpace>();
1371
6
    factory.register_function<FunctionStringStartsWith>();
1372
6
    factory.register_function<FunctionStringEndsWith>();
1373
6
    factory.register_function<FunctionStringInstr>();
1374
6
    factory.register_function<FunctionStringFindInSet>();
1375
6
    factory.register_function<FunctionStringLocate>();
1376
6
    factory.register_function<FunctionQuote>();
1377
6
    factory.register_function<FunctionReverseCommon>();
1378
6
    factory.register_function<FunctionUnHex>();
1379
6
    factory.register_function<FunctionUnHexNullable>();
1380
6
    factory.register_function<FunctionToLower>();
1381
6
    factory.register_function<FunctionToUpper>();
1382
6
    factory.register_function<FunctionToInitcap>();
1383
6
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1384
6
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1385
6
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1386
6
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1387
6
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1388
6
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1389
6
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1390
6
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1391
6
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1392
6
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1393
6
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1394
6
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1395
6
    factory.register_function<FunctionStringConcat>();
1396
6
    factory.register_function<FunctionStringElt>();
1397
6
    factory.register_function<FunctionStringConcatWs>();
1398
6
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1399
6
    factory.register_function<FunctionStringRepeat>();
1400
6
    factory.register_function<FunctionStringLPad>();
1401
6
    factory.register_function<FunctionStringRPad>();
1402
6
    factory.register_function<FunctionToBase64>();
1403
6
    factory.register_function<FunctionFromBase64>();
1404
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1405
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1406
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1407
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1408
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1409
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1410
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1411
6
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1412
6
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1413
6
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1414
6
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1415
6
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1416
6
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1417
6
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1418
6
    factory.register_function<
1419
6
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1420
6
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1421
6
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1422
6
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1423
6
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1424
6
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1425
6
    factory.register_function<FunctionOverlay>();
1426
1427
6
    factory.register_alias(FunctionToLower::name, "lcase");
1428
6
    factory.register_alias(FunctionToUpper::name, "ucase");
1429
6
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1430
6
    factory.register_alias(FunctionStringLength::name, "octet_length");
1431
6
    factory.register_alias(FunctionOverlay::name, "insert");
1432
6
}
1433
1434
} // namespace doris