Coverage Report

Created: 2026-03-24 14:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/function_string.h"
19
20
#include <ctype.h>
21
#include <math.h>
22
#include <re2/stringpiece.h>
23
#include <unicode/schriter.h>
24
#include <unicode/uchar.h>
25
#include <unicode/unistr.h>
26
#include <unicode/ustream.h>
27
28
#include <bitset>
29
#include <cstddef>
30
#include <cstdint>
31
#include <string_view>
32
33
#include "common/cast_set.h"
34
#include "common/status.h"
35
#include "core/column/column.h"
36
#include "core/column/column_string.h"
37
#include "core/pod_array_fwd.h"
38
#include "core/string_ref.h"
39
#include "exprs/function/function_reverse.h"
40
#include "exprs/function/function_string_to_string.h"
41
#include "exprs/function/function_totype.h"
42
#include "exprs/function/simple_function_factory.h"
43
#include "exprs/function/string_hex_util.h"
44
#include "util/string_search.hpp"
45
#include "util/url_coding.h"
46
47
namespace doris {
48
#include "common/compile_check_begin.h"
49
struct NameStringASCII {
50
    static constexpr auto name = "ascii";
51
};
52
53
struct StringASCII {
54
    using ReturnType = DataTypeInt32;
55
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
56
    using Type = String;
57
    using ReturnColumnType = ColumnInt32;
58
59
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
60
55
                         PaddedPODArray<Int32>& res) {
61
55
        auto size = offsets.size();
62
55
        res.resize(size);
63
165
        for (int i = 0; i < size; ++i) {
64
110
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
65
110
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
66
110
        }
67
55
        return Status::OK();
68
55
    }
69
};
70
71
struct NameParseDataSize {
72
    static constexpr auto name = "parse_data_size";
73
};
74
75
static const std::map<std::string_view, Int128> UNITS = {
76
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
77
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
78
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
79
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
80
        {"YB", static_cast<Int128>(1) << 80}};
81
82
struct ParseDataSize {
83
    using ReturnType = DataTypeInt128;
84
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
85
    using Type = String;
86
    using ReturnColumnType = ColumnInt128;
87
88
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
89
48
                         PaddedPODArray<Int128>& res) {
90
48
        auto size = offsets.size();
91
48
        res.resize(size);
92
100
        for (int i = 0; i < size; ++i) {
93
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
94
52
            int str_size = offsets[i] - offsets[i - 1];
95
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
96
52
        }
97
48
        return Status::OK();
98
48
    }
99
100
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
101
52
        int digit_length = 0;
102
216
        for (char c : dataSize) {
103
216
            if (isdigit(c) || c == '.') {
104
166
                digit_length++;
105
166
            } else {
106
50
                break;
107
50
            }
108
216
        }
109
110
52
        if (digit_length == 0) {
111
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
112
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
113
4
                                   dataSize);
114
4
        }
115
        // 123.45MB--->123.45 : MB
116
48
        double value = 0.0;
117
48
        try {
118
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
119
48
        } catch (const std::exception& e) {
120
0
            throw doris::Exception(
121
0
                    ErrorCode::INVALID_ARGUMENT,
122
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
123
0
                    dataSize, e.what());
124
0
        }
125
48
        auto unit = dataSize.substr(digit_length);
126
48
        auto it = UNITS.find(unit);
127
48
        if (it != UNITS.end()) {
128
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
129
45
        } else {
130
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
131
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
132
3
                                   dataSize);
133
3
        }
134
48
    }
135
};
136
137
struct NameQuote {
138
    static constexpr auto name = "quote";
139
};
140
141
struct NameQuoteImpl {
142
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
143
17
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
144
17
        size_t offset_size = offsets.size();
145
17
        ColumnString::Offset pos = 0;
146
17
        res_offsets.resize(offset_size);
147
17
        res_data.resize(data.size() + offset_size * 2);
148
45
        for (int i = 0; i < offset_size; i++) {
149
28
            const unsigned char* raw_str = &data[offsets[i - 1]];
150
28
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
151
28
            res_data[pos] = '\'';
152
28
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
153
28
            res_data[pos + size + 1] = '\'';
154
28
            pos += size + 2;
155
28
            res_offsets[i] = pos;
156
28
        }
157
17
        return Status::OK();
158
17
    }
159
};
160
161
struct NameStringLength {
162
    static constexpr auto name = "length";
163
};
164
165
struct StringLengthImpl {
166
    using ReturnType = DataTypeInt32;
167
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
168
    using Type = String;
169
    using ReturnColumnType = ColumnInt32;
170
171
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
172
9.32k
                         PaddedPODArray<Int32>& res) {
173
9.32k
        auto size = offsets.size();
174
9.32k
        res.resize(size);
175
5.05M
        for (int i = 0; i < size; ++i) {
176
5.05M
            int str_size = offsets[i] - offsets[i - 1];
177
5.05M
            res[i] = str_size;
178
5.05M
        }
179
9.32k
        return Status::OK();
180
9.32k
    }
181
};
182
183
struct NameCrc32 {
184
    static constexpr auto name = "crc32";
185
};
186
187
struct Crc32Impl {
188
    using ReturnType = DataTypeInt64;
189
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
190
    using Type = String;
191
    using ReturnColumnType = ColumnInt64;
192
193
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
194
3
                         PaddedPODArray<Int64>& res) {
195
3
        auto size = offsets.size();
196
3
        res.resize(size);
197
6
        for (int i = 0; i < size; ++i) {
198
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
199
3
                             offsets[i] - offsets[i - 1]);
200
3
        }
201
3
        return Status::OK();
202
3
    }
203
};
204
205
struct NameStringUtf8Length {
206
    static constexpr auto name = "char_length";
207
};
208
209
struct StringUtf8LengthImpl {
210
    using ReturnType = DataTypeInt32;
211
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
212
    using Type = String;
213
    using ReturnColumnType = ColumnInt32;
214
215
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
216
53
                         PaddedPODArray<Int32>& res) {
217
53
        auto size = offsets.size();
218
53
        res.resize(size);
219
165
        for (int i = 0; i < size; ++i) {
220
112
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
221
112
            int str_size = offsets[i] - offsets[i - 1];
222
112
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
223
112
        }
224
53
        return Status::OK();
225
53
    }
226
};
227
228
struct NameStartsWith {
229
    static constexpr auto name = "starts_with";
230
};
231
232
struct StartsWithOp {
233
    using ResultDataType = DataTypeUInt8;
234
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
235
236
152
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
237
152
        res = strl.starts_with(strr);
238
152
    }
239
};
240
241
struct NameEndsWith {
242
    static constexpr auto name = "ends_with";
243
};
244
245
struct EndsWithOp {
246
    using ResultDataType = DataTypeUInt8;
247
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
248
249
154
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
250
154
        res = strl.ends_with(strr);
251
154
    }
252
};
253
254
struct NameFindInSet {
255
    static constexpr auto name = "find_in_set";
256
};
257
258
struct FindInSetOp {
259
    using ResultDataType = DataTypeInt32;
260
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
261
180
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
262
680
        for (const auto& c : strl) {
263
680
            if (c == ',') {
264
21
                res = 0;
265
21
                return;
266
21
            }
267
680
        }
268
269
159
        int32_t token_index = 1;
270
159
        int32_t start = 0;
271
159
        int32_t end;
272
273
305
        do {
274
305
            end = start;
275
            // Position end.
276
1.16k
            while (end < strr.length() && strr[end] != ',') {
277
858
                ++end;
278
858
            }
279
280
305
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
281
101
                res = token_index;
282
101
                return;
283
101
            }
284
285
            // Re-position start and end past ','
286
204
            start = end + 1;
287
204
            ++token_index;
288
204
        } while (start < strr.length());
289
58
        res = 0;
290
58
    }
291
};
292
293
struct NameInstr {
294
    static constexpr auto name = "instr";
295
};
296
297
// LeftDataType and RightDataType are DataTypeString
298
template <typename LeftDataType, typename RightDataType>
299
struct StringInStrImpl {
300
    using ResultDataType = DataTypeInt32;
301
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
302
303
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
304
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
305
72
        StringRef lstr_ref(ldata.data, ldata.size);
306
307
72
        auto size = roffsets.size();
308
72
        res.resize(size);
309
144
        for (int i = 0; i < size; ++i) {
310
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
311
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
312
313
72
            StringRef rstr_ref(r_raw_str, r_str_size);
314
315
72
            res[i] = execute(lstr_ref, rstr_ref);
316
72
        }
317
318
72
        return Status::OK();
319
72
    }
320
321
    static Status vector_scalar(const ColumnString::Chars& ldata,
322
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
323
103
                                ResultPaddedPODArray& res) {
324
103
        auto size = loffsets.size();
325
103
        res.resize(size);
326
327
103
        if (rdata.size == 0) {
328
12
            std::fill(res.begin(), res.end(), 1);
329
12
            return Status::OK();
330
12
        }
331
332
91
        const UInt8* begin = ldata.data();
333
91
        const UInt8* end = begin + ldata.size();
334
91
        const UInt8* pos = begin;
335
336
        /// Current index in the array of strings.
337
91
        size_t i = 0;
338
91
        std::fill(res.begin(), res.end(), 0);
339
340
91
        StringRef rstr_ref(rdata.data, rdata.size);
341
91
        StringSearch search(&rstr_ref);
342
343
120
        while (pos < end) {
344
            // search return matched substring start offset
345
93
            pos = (UInt8*)search.search((char*)pos, end - pos);
346
93
            if (pos >= end) {
347
64
                break;
348
64
            }
349
350
            /// Determine which index it refers to.
351
            /// begin + value_offsets[i] is the start offset of string at i+1
352
32
            while (begin + loffsets[i] < pos) {
353
3
                ++i;
354
3
            }
355
356
            /// We check that the entry does not pass through the boundaries of strings.
357
29
            if (pos + rdata.size <= begin + loffsets[i]) {
358
28
                int loc = (int)(pos - begin) - loffsets[i - 1];
359
28
                int l_str_size = loffsets[i] - loffsets[i - 1];
360
28
                auto len = std::min(l_str_size, loc);
361
28
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
362
28
                res[i] = loc + 1;
363
28
            }
364
365
            // move to next string offset
366
29
            pos = begin + loffsets[i];
367
29
            ++i;
368
29
        }
369
370
91
        return Status::OK();
371
103
    }
372
373
    static Status vector_vector(const ColumnString::Chars& ldata,
374
                                const ColumnString::Offsets& loffsets,
375
                                const ColumnString::Chars& rdata,
376
207
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
377
207
        DCHECK_EQ(loffsets.size(), roffsets.size());
378
379
207
        auto size = loffsets.size();
380
207
        res.resize(size);
381
661
        for (int i = 0; i < size; ++i) {
382
454
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
383
454
            int l_str_size = loffsets[i] - loffsets[i - 1];
384
454
            StringRef lstr_ref(l_raw_str, l_str_size);
385
386
454
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
387
454
            int r_str_size = roffsets[i] - roffsets[i - 1];
388
454
            StringRef rstr_ref(r_raw_str, r_str_size);
389
390
454
            res[i] = execute(lstr_ref, rstr_ref);
391
454
        }
392
393
207
        return Status::OK();
394
207
    }
395
396
526
    static int execute(const StringRef& strl, const StringRef& strr) {
397
526
        if (strr.size == 0) {
398
71
            return 1;
399
71
        }
400
401
455
        StringSearch search(&strr);
402
        // Hive returns positions starting from 1.
403
455
        int loc = search.search(&strl);
404
455
        if (loc > 0) {
405
43
            int len = std::min(loc, (int)strl.size);
406
43
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
407
43
        }
408
409
455
        return loc + 1;
410
526
    }
411
};
412
413
// the same impl as instr
414
struct NameLocate {
415
    static constexpr auto name = "locate";
416
};
417
418
// LeftDataType and RightDataType are DataTypeString
419
template <typename LeftDataType, typename RightDataType>
420
struct StringLocateImpl {
421
    using ResultDataType = DataTypeInt32;
422
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
423
424
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
425
39
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
426
39
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
427
39
                                                                           res);
428
39
    }
429
430
    static Status vector_scalar(const ColumnString::Chars& ldata,
431
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
432
36
                                ResultPaddedPODArray& res) {
433
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
434
36
                                                                           res);
435
36
    }
436
437
    static Status vector_vector(const ColumnString::Chars& ldata,
438
                                const ColumnString::Offsets& loffsets,
439
                                const ColumnString::Chars& rdata,
440
126
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
441
126
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
442
126
                                                                           loffsets, res);
443
126
    }
444
};
445
446
// LeftDataType and RightDataType are DataTypeString
447
template <typename LeftDataType, typename RightDataType, typename OP>
448
struct StringFunctionImpl {
449
    using ResultDataType = typename OP::ResultDataType;
450
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
451
452
    static Status vector_vector(const ColumnString::Chars& ldata,
453
                                const ColumnString::Offsets& loffsets,
454
                                const ColumnString::Chars& rdata,
455
215
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
215
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
215
        auto size = loffsets.size();
459
215
        res.resize(size);
460
580
        for (int i = 0; i < size; ++i) {
461
365
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
365
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
365
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
365
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
365
            std::string_view lview(l_raw_str, l_str_size);
468
365
            std::string_view rview(r_raw_str, r_str_size);
469
470
365
            OP::execute(lview, rview, res[i]);
471
365
        }
472
215
        return Status::OK();
473
215
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
455
90
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
90
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
90
        auto size = loffsets.size();
459
90
        res.resize(size);
460
219
        for (int i = 0; i < size; ++i) {
461
129
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
129
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
129
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
129
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
129
            std::string_view lview(l_raw_str, l_str_size);
468
129
            std::string_view rview(r_raw_str, r_str_size);
469
470
129
            OP::execute(lview, rview, res[i]);
471
129
        }
472
90
        return Status::OK();
473
90
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
455
61
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
61
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
61
        auto size = loffsets.size();
459
61
        res.resize(size);
460
175
        for (int i = 0; i < size; ++i) {
461
114
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
114
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
114
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
114
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
114
            std::string_view lview(l_raw_str, l_str_size);
468
114
            std::string_view rview(r_raw_str, r_str_size);
469
470
114
            OP::execute(lview, rview, res[i]);
471
114
        }
472
61
        return Status::OK();
473
61
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
455
64
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
64
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
64
        auto size = loffsets.size();
459
64
        res.resize(size);
460
186
        for (int i = 0; i < size; ++i) {
461
122
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
122
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
122
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
122
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
122
            std::string_view lview(l_raw_str, l_str_size);
468
122
            std::string_view rview(r_raw_str, r_str_size);
469
470
122
            OP::execute(lview, rview, res[i]);
471
122
        }
472
64
        return Status::OK();
473
64
    }
474
    static Status vector_scalar(const ColumnString::Chars& ldata,
475
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
476
37
                                ResultPaddedPODArray& res) {
477
37
        auto size = loffsets.size();
478
37
        res.resize(size);
479
37
        std::string_view rview(rdata.data, rdata.size);
480
108
        for (int i = 0; i < size; ++i) {
481
71
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
71
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
71
            std::string_view lview(l_raw_str, l_str_size);
484
485
71
            OP::execute(lview, rview, res[i]);
486
71
        }
487
37
        return Status::OK();
488
37
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
476
5
                                ResultPaddedPODArray& res) {
477
5
        auto size = loffsets.size();
478
5
        res.resize(size);
479
5
        std::string_view rview(rdata.data, rdata.size);
480
24
        for (int i = 0; i < size; ++i) {
481
19
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
19
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
19
            std::string_view lview(l_raw_str, l_str_size);
484
485
19
            OP::execute(lview, rview, res[i]);
486
19
        }
487
5
        return Status::OK();
488
5
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
476
15
                                ResultPaddedPODArray& res) {
477
15
        auto size = loffsets.size();
478
15
        res.resize(size);
479
15
        std::string_view rview(rdata.data, rdata.size);
480
41
        for (int i = 0; i < size; ++i) {
481
26
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
26
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
26
            std::string_view lview(l_raw_str, l_str_size);
484
485
26
            OP::execute(lview, rview, res[i]);
486
26
        }
487
15
        return Status::OK();
488
15
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
476
17
                                ResultPaddedPODArray& res) {
477
17
        auto size = loffsets.size();
478
17
        res.resize(size);
479
17
        std::string_view rview(rdata.data, rdata.size);
480
43
        for (int i = 0; i < size; ++i) {
481
26
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
26
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
26
            std::string_view lview(l_raw_str, l_str_size);
484
485
26
            OP::execute(lview, rview, res[i]);
486
26
        }
487
17
        return Status::OK();
488
17
    }
489
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
490
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
44
        auto size = roffsets.size();
492
44
        res.resize(size);
493
44
        std::string_view lview(ldata.data, ldata.size);
494
94
        for (int i = 0; i < size; ++i) {
495
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
50
            std::string_view rview(r_raw_str, r_str_size);
498
499
50
            OP::execute(lview, rview, res[i]);
500
50
        }
501
44
        return Status::OK();
502
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
490
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
4
        auto size = roffsets.size();
492
4
        res.resize(size);
493
4
        std::string_view lview(ldata.data, ldata.size);
494
8
        for (int i = 0; i < size; ++i) {
495
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
4
            std::string_view rview(r_raw_str, r_str_size);
498
499
4
            OP::execute(lview, rview, res[i]);
500
4
        }
501
4
        return Status::OK();
502
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
490
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
14
        auto size = roffsets.size();
492
14
        res.resize(size);
493
14
        std::string_view lview(ldata.data, ldata.size);
494
28
        for (int i = 0; i < size; ++i) {
495
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
14
            std::string_view rview(r_raw_str, r_str_size);
498
499
14
            OP::execute(lview, rview, res[i]);
500
14
        }
501
14
        return Status::OK();
502
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
490
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
26
        auto size = roffsets.size();
492
26
        res.resize(size);
493
26
        std::string_view lview(ldata.data, ldata.size);
494
58
        for (int i = 0; i < size; ++i) {
495
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
32
            std::string_view rview(r_raw_str, r_str_size);
498
499
32
            OP::execute(lview, rview, res[i]);
500
32
        }
501
26
        return Status::OK();
502
26
    }
503
};
504
505
struct NameToLower {
506
    static constexpr auto name = "lower";
507
};
508
509
struct NameToUpper {
510
    static constexpr auto name = "upper";
511
};
512
513
template <typename OpName>
514
struct TransferImpl {
515
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
516
401
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
517
401
        size_t offset_size = offsets.size();
518
401
        if (UNLIKELY(!offset_size)) {
519
0
            return Status::OK();
520
0
        }
521
522
401
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
523
401
        res_offsets.resize(offset_size);
524
401
        if (is_ascii) {
525
342
            memcpy_small_allow_read_write_overflow15(
526
342
                    res_offsets.data(), offsets.data(),
527
342
                    offset_size * sizeof(ColumnString::Offsets::value_type));
528
529
342
            size_t data_length = data.size();
530
342
            res_data.resize(data_length);
531
342
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
532
96
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
533
246
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
534
246
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
535
246
            }
536
342
        } else {
537
59
            execute_utf8(data, offsets, res_data, res_offsets);
538
59
        }
539
540
401
        return Status::OK();
541
401
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
516
267
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
517
267
        size_t offset_size = offsets.size();
518
267
        if (UNLIKELY(!offset_size)) {
519
0
            return Status::OK();
520
0
        }
521
522
267
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
523
267
        res_offsets.resize(offset_size);
524
267
        if (is_ascii) {
525
246
            memcpy_small_allow_read_write_overflow15(
526
246
                    res_offsets.data(), offsets.data(),
527
246
                    offset_size * sizeof(ColumnString::Offsets::value_type));
528
529
246
            size_t data_length = data.size();
530
246
            res_data.resize(data_length);
531
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
532
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
533
246
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
534
246
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
535
246
            }
536
246
        } else {
537
21
            execute_utf8(data, offsets, res_data, res_offsets);
538
21
        }
539
540
267
        return Status::OK();
541
267
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
516
134
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
517
134
        size_t offset_size = offsets.size();
518
134
        if (UNLIKELY(!offset_size)) {
519
0
            return Status::OK();
520
0
        }
521
522
134
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
523
134
        res_offsets.resize(offset_size);
524
134
        if (is_ascii) {
525
96
            memcpy_small_allow_read_write_overflow15(
526
96
                    res_offsets.data(), offsets.data(),
527
96
                    offset_size * sizeof(ColumnString::Offsets::value_type));
528
529
96
            size_t data_length = data.size();
530
96
            res_data.resize(data_length);
531
96
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
532
96
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
533
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
534
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
535
            }
536
96
        } else {
537
38
            execute_utf8(data, offsets, res_data, res_offsets);
538
38
        }
539
540
134
        return Status::OK();
541
134
    }
542
543
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
544
60
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
545
60
        std::string result;
546
198
        for (int64_t i = 0; i < offsets.size(); ++i) {
547
138
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
548
138
            uint32_t size = offsets[i] - offsets[i - 1];
549
550
138
            result.clear();
551
138
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
552
91
                to_upper_utf8(begin, size, result);
553
91
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
554
47
                to_lower_utf8(begin, size, result);
555
47
            }
556
138
            StringOP::push_value_string(result, i, res_data, res_offsets);
557
138
        }
558
60
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
544
21
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
545
21
        std::string result;
546
68
        for (int64_t i = 0; i < offsets.size(); ++i) {
547
47
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
548
47
            uint32_t size = offsets[i] - offsets[i - 1];
549
550
47
            result.clear();
551
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
552
                to_upper_utf8(begin, size, result);
553
47
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
554
47
                to_lower_utf8(begin, size, result);
555
47
            }
556
47
            StringOP::push_value_string(result, i, res_data, res_offsets);
557
47
        }
558
21
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
544
39
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
545
39
        std::string result;
546
130
        for (int64_t i = 0; i < offsets.size(); ++i) {
547
91
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
548
91
            uint32_t size = offsets[i] - offsets[i - 1];
549
550
91
            result.clear();
551
91
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
552
91
                to_upper_utf8(begin, size, result);
553
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
554
                to_lower_utf8(begin, size, result);
555
            }
556
91
            StringOP::push_value_string(result, i, res_data, res_offsets);
557
91
        }
558
39
    }
559
560
91
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
561
91
        icu::StringPiece sp;
562
91
        sp.set(data, size);
563
91
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
564
91
        unicode_str.toUpper();
565
91
        unicode_str.toUTF8String(result);
566
91
    }
567
568
47
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
569
47
        icu::StringPiece sp;
570
47
        sp.set(data, size);
571
47
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
572
47
        unicode_str.toLower();
573
47
        unicode_str.toUTF8String(result);
574
47
    }
575
};
576
577
// Capitalize first letter
578
struct NameToInitcap {
579
    static constexpr auto name = "initcap";
580
};
581
582
struct InitcapImpl {
583
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
584
173
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
585
173
        res_offsets.resize(offsets.size());
586
587
173
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
588
173
        if (is_ascii) {
589
115
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
590
115
        } else {
591
58
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
592
58
        }
593
173
        return Status::OK();
594
173
    }
595
596
    static void impl_vectors_ascii(const ColumnString::Chars& data,
597
                                   const ColumnString::Offsets& offsets,
598
                                   ColumnString::Chars& res_data,
599
115
                                   ColumnString::Offsets& res_offsets) {
600
115
        size_t offset_size = offsets.size();
601
115
        memcpy_small_allow_read_write_overflow15(
602
115
                res_offsets.data(), offsets.data(),
603
115
                offset_size * sizeof(ColumnString::Offsets::value_type));
604
605
115
        size_t data_length = data.size();
606
115
        res_data.resize(data_length);
607
115
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
608
609
115
        bool need_capitalize = true;
610
247
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
611
132
            auto end_index = res_offsets[offset_index];
612
132
            need_capitalize = true;
613
614
1.56k
            for (size_t i = start_index; i < end_index; ++i) {
615
1.43k
                if (!::isalnum(res_data[i])) {
616
216
                    need_capitalize = true;
617
1.21k
                } else if (need_capitalize) {
618
                    /*
619
                    https://en.cppreference.com/w/cpp/string/byte/toupper
620
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
621
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
622
                    char my_toupper(char ch)
623
                    {
624
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
625
                    }
626
                    */
627
267
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
628
267
                    need_capitalize = false;
629
267
                }
630
1.43k
            }
631
632
132
            start_index = end_index;
633
132
        }
634
115
    }
635
636
    static void impl_vectors_utf8(const ColumnString::Chars& data,
637
                                  const ColumnString::Offsets& offsets,
638
                                  ColumnString::Chars& res_data,
639
58
                                  ColumnString::Offsets& res_offsets) {
640
58
        std::string result;
641
123
        for (int64_t i = 0; i < offsets.size(); ++i) {
642
65
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
643
65
            uint32_t size = offsets[i] - offsets[i - 1];
644
65
            result.clear();
645
65
            to_initcap_utf8(begin, size, result);
646
65
            StringOP::push_value_string(result, i, res_data, res_offsets);
647
65
        }
648
58
    }
649
650
65
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
651
65
        icu::StringPiece sp;
652
65
        sp.set(data, size);
653
65
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
654
65
        unicode_str.toLower();
655
65
        icu::UnicodeString output_str;
656
65
        bool need_capitalize = true;
657
65
        icu::StringCharacterIterator iter(unicode_str);
658
647
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
659
582
            if (!u_isalnum(ch)) {
660
105
                need_capitalize = true;
661
477
            } else if (need_capitalize) {
662
87
                ch = u_toupper(ch);
663
87
                need_capitalize = false;
664
87
            }
665
582
            output_str.append(ch);
666
582
        }
667
65
        output_str.toUTF8String(result);
668
65
    }
669
};
670
671
struct NameTrim {
672
    static constexpr auto name = "trim";
673
};
674
struct NameLTrim {
675
    static constexpr auto name = "ltrim";
676
};
677
struct NameRTrim {
678
    static constexpr auto name = "rtrim";
679
};
680
struct NameTrimIn {
681
    static constexpr auto name = "trim_in";
682
};
683
struct NameLTrimIn {
684
    static constexpr auto name = "ltrim_in";
685
};
686
struct NameRTrimIn {
687
    static constexpr auto name = "rtrim_in";
688
};
689
template <bool is_ltrim, bool is_rtrim, bool trim_single>
690
struct TrimUtil {
691
    static Status vector(const ColumnString::Chars& str_data,
692
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
693
301
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
301
        const size_t offset_size = str_offsets.size();
695
301
        res_offsets.resize(offset_size);
696
301
        res_data.reserve(str_data.size());
697
863
        for (size_t i = 0; i < offset_size; ++i) {
698
562
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
562
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
562
            if constexpr (is_ltrim) {
702
345
                str_begin =
703
345
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
345
            }
705
562
            if constexpr (is_rtrim) {
706
395
                str_end =
707
395
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
395
            }
709
710
562
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
562
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
562
        }
714
301
        return Status::OK();
715
301
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
58
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
58
        const size_t offset_size = str_offsets.size();
695
58
        res_offsets.resize(offset_size);
696
58
        res_data.reserve(str_data.size());
697
178
        for (size_t i = 0; i < offset_size; ++i) {
698
120
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
120
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
120
            if constexpr (is_ltrim) {
702
120
                str_begin =
703
120
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
120
            }
705
120
            if constexpr (is_rtrim) {
706
120
                str_end =
707
120
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
120
            }
709
710
120
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
120
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
120
        }
714
58
        return Status::OK();
715
58
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
53
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
53
        const size_t offset_size = str_offsets.size();
695
53
        res_offsets.resize(offset_size);
696
53
        res_data.reserve(str_data.size());
697
159
        for (size_t i = 0; i < offset_size; ++i) {
698
106
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
106
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
106
            if constexpr (is_ltrim) {
702
106
                str_begin =
703
106
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
106
            }
705
            if constexpr (is_rtrim) {
706
                str_end =
707
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
            }
709
710
106
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
106
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
106
        }
714
53
        return Status::OK();
715
53
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
94
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
94
        const size_t offset_size = str_offsets.size();
695
94
        res_offsets.resize(offset_size);
696
94
        res_data.reserve(str_data.size());
697
266
        for (size_t i = 0; i < offset_size; ++i) {
698
172
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
172
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
            if constexpr (is_ltrim) {
702
                str_begin =
703
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
            }
705
172
            if constexpr (is_rtrim) {
706
172
                str_end =
707
172
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
172
            }
709
710
172
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
172
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
172
        }
714
94
        return Status::OK();
715
94
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
24
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
24
        const size_t offset_size = str_offsets.size();
695
24
        res_offsets.resize(offset_size);
696
24
        res_data.reserve(str_data.size());
697
82
        for (size_t i = 0; i < offset_size; ++i) {
698
58
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
58
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
58
            if constexpr (is_ltrim) {
702
58
                str_begin =
703
58
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
58
            }
705
58
            if constexpr (is_rtrim) {
706
58
                str_end =
707
58
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
58
            }
709
710
58
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
58
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
58
        }
714
24
        return Status::OK();
715
24
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
27
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
27
        const size_t offset_size = str_offsets.size();
695
27
        res_offsets.resize(offset_size);
696
27
        res_data.reserve(str_data.size());
697
88
        for (size_t i = 0; i < offset_size; ++i) {
698
61
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
61
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
61
            if constexpr (is_ltrim) {
702
61
                str_begin =
703
61
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
61
            }
705
            if constexpr (is_rtrim) {
706
                str_end =
707
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
            }
709
710
61
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
61
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
61
        }
714
27
        return Status::OK();
715
27
    }
_ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
45
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
45
        const size_t offset_size = str_offsets.size();
695
45
        res_offsets.resize(offset_size);
696
45
        res_data.reserve(str_data.size());
697
90
        for (size_t i = 0; i < offset_size; ++i) {
698
45
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
45
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
            if constexpr (is_ltrim) {
702
                str_begin =
703
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
            }
705
45
            if constexpr (is_rtrim) {
706
45
                str_end =
707
45
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
45
            }
709
710
45
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
45
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
45
        }
714
45
        return Status::OK();
715
45
    }
716
};
717
template <bool is_ltrim, bool is_rtrim, bool trim_single>
718
struct TrimInUtil {
719
    static Status vector(const ColumnString::Chars& str_data,
720
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
721
121
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
121
        const size_t offset_size = str_offsets.size();
723
121
        res_offsets.resize(offset_size);
724
121
        res_data.reserve(str_data.size());
725
121
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
121
                         simd::VStringFunctions::is_ascii(StringRef(
727
76
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
121
        if (all_ascii) {
730
68
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
68
        } else {
732
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
53
        }
734
121
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
721
43
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
43
        const size_t offset_size = str_offsets.size();
723
43
        res_offsets.resize(offset_size);
724
43
        res_data.reserve(str_data.size());
725
43
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
43
                         simd::VStringFunctions::is_ascii(StringRef(
727
28
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
43
        if (all_ascii) {
730
24
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
24
        } else {
732
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
19
        }
734
43
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
721
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
36
        const size_t offset_size = str_offsets.size();
723
36
        res_offsets.resize(offset_size);
724
36
        res_data.reserve(str_data.size());
725
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
36
                         simd::VStringFunctions::is_ascii(StringRef(
727
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
36
        if (all_ascii) {
730
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
19
        } else {
732
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
17
        }
734
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
721
42
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
42
        const size_t offset_size = str_offsets.size();
723
42
        res_offsets.resize(offset_size);
724
42
        res_data.reserve(str_data.size());
725
42
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
42
                         simd::VStringFunctions::is_ascii(StringRef(
727
27
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
42
        if (all_ascii) {
730
25
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
25
        } else {
732
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
17
        }
734
42
    }
735
736
private:
737
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
738
                                     const ColumnString::Offsets& str_offsets,
739
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
740
68
                                     ColumnString::Offsets& res_offsets) {
741
68
        const size_t offset_size = str_offsets.size();
742
68
        std::bitset<128> char_lookup;
743
68
        const char* remove_begin = remove_str.data;
744
68
        const char* remove_end = remove_str.data + remove_str.size;
745
746
251
        while (remove_begin < remove_end) {
747
183
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
183
            remove_begin += 1;
749
183
        }
750
751
136
        for (size_t i = 0; i < offset_size; ++i) {
752
68
            const char* str_begin =
753
68
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
68
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
68
            const char* left_trim_pos = str_begin;
756
68
            const char* right_trim_pos = str_end;
757
758
68
            if constexpr (is_ltrim) {
759
127
                while (left_trim_pos < str_end) {
760
114
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
30
                        break;
762
30
                    }
763
84
                    ++left_trim_pos;
764
84
                }
765
43
            }
766
767
68
            if constexpr (is_rtrim) {
768
114
                while (right_trim_pos > left_trim_pos) {
769
100
                    --right_trim_pos;
770
100
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
35
                        ++right_trim_pos;
772
35
                        break;
773
35
                    }
774
100
                }
775
49
            }
776
777
68
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
68
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
68
        }
781
782
68
        return Status::OK();
783
68
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
740
24
                                     ColumnString::Offsets& res_offsets) {
741
24
        const size_t offset_size = str_offsets.size();
742
24
        std::bitset<128> char_lookup;
743
24
        const char* remove_begin = remove_str.data;
744
24
        const char* remove_end = remove_str.data + remove_str.size;
745
746
86
        while (remove_begin < remove_end) {
747
62
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
62
            remove_begin += 1;
749
62
        }
750
751
48
        for (size_t i = 0; i < offset_size; ++i) {
752
24
            const char* str_begin =
753
24
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
24
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
24
            const char* left_trim_pos = str_begin;
756
24
            const char* right_trim_pos = str_end;
757
758
24
            if constexpr (is_ltrim) {
759
57
                while (left_trim_pos < str_end) {
760
50
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
17
                        break;
762
17
                    }
763
33
                    ++left_trim_pos;
764
33
                }
765
24
            }
766
767
24
            if constexpr (is_rtrim) {
768
39
                while (right_trim_pos > left_trim_pos) {
769
32
                    --right_trim_pos;
770
32
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
17
                        ++right_trim_pos;
772
17
                        break;
773
17
                    }
774
32
                }
775
24
            }
776
777
24
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
24
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
24
        }
781
782
24
        return Status::OK();
783
24
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
740
19
                                     ColumnString::Offsets& res_offsets) {
741
19
        const size_t offset_size = str_offsets.size();
742
19
        std::bitset<128> char_lookup;
743
19
        const char* remove_begin = remove_str.data;
744
19
        const char* remove_end = remove_str.data + remove_str.size;
745
746
73
        while (remove_begin < remove_end) {
747
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
54
            remove_begin += 1;
749
54
        }
750
751
38
        for (size_t i = 0; i < offset_size; ++i) {
752
19
            const char* str_begin =
753
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
19
            const char* left_trim_pos = str_begin;
756
19
            const char* right_trim_pos = str_end;
757
758
19
            if constexpr (is_ltrim) {
759
70
                while (left_trim_pos < str_end) {
760
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
13
                        break;
762
13
                    }
763
51
                    ++left_trim_pos;
764
51
                }
765
19
            }
766
767
            if constexpr (is_rtrim) {
768
                while (right_trim_pos > left_trim_pos) {
769
                    --right_trim_pos;
770
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
                        ++right_trim_pos;
772
                        break;
773
                    }
774
                }
775
            }
776
777
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
19
        }
781
782
19
        return Status::OK();
783
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
740
25
                                     ColumnString::Offsets& res_offsets) {
741
25
        const size_t offset_size = str_offsets.size();
742
25
        std::bitset<128> char_lookup;
743
25
        const char* remove_begin = remove_str.data;
744
25
        const char* remove_end = remove_str.data + remove_str.size;
745
746
92
        while (remove_begin < remove_end) {
747
67
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
67
            remove_begin += 1;
749
67
        }
750
751
50
        for (size_t i = 0; i < offset_size; ++i) {
752
25
            const char* str_begin =
753
25
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
25
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
25
            const char* left_trim_pos = str_begin;
756
25
            const char* right_trim_pos = str_end;
757
758
            if constexpr (is_ltrim) {
759
                while (left_trim_pos < str_end) {
760
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
                        break;
762
                    }
763
                    ++left_trim_pos;
764
                }
765
            }
766
767
25
            if constexpr (is_rtrim) {
768
75
                while (right_trim_pos > left_trim_pos) {
769
68
                    --right_trim_pos;
770
68
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
18
                        ++right_trim_pos;
772
18
                        break;
773
18
                    }
774
68
                }
775
25
            }
776
777
25
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
25
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
25
        }
781
782
25
        return Status::OK();
783
25
    }
784
785
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
786
                                    const ColumnString::Offsets& str_offsets,
787
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
788
53
                                    ColumnString::Offsets& res_offsets) {
789
53
        const size_t offset_size = str_offsets.size();
790
53
        res_offsets.resize(offset_size);
791
53
        res_data.reserve(str_data.size());
792
793
53
        std::unordered_set<std::string_view> char_lookup;
794
53
        const char* remove_begin = remove_str.data;
795
53
        const char* remove_end = remove_str.data + remove_str.size;
796
797
240
        while (remove_begin < remove_end) {
798
187
            size_t byte_len, char_len;
799
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
187
                    remove_begin, remove_end, 1);
801
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
187
            remove_begin += byte_len;
803
187
        }
804
805
140
        for (size_t i = 0; i < offset_size; ++i) {
806
87
            const char* str_begin =
807
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
87
            const char* left_trim_pos = str_begin;
810
87
            const char* right_trim_pos = str_end;
811
812
87
            if constexpr (is_ltrim) {
813
81
                while (left_trim_pos < str_end) {
814
73
                    size_t byte_len, char_len;
815
73
                    std::tie(byte_len, char_len) =
816
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
73
                                                                                   str_end, 1);
818
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
73
                        char_lookup.end()) {
820
52
                        break;
821
52
                    }
822
21
                    left_trim_pos += byte_len;
823
21
                }
824
60
            }
825
826
87
            if constexpr (is_rtrim) {
827
88
                while (right_trim_pos > left_trim_pos) {
828
80
                    const char* prev_char_pos = right_trim_pos;
829
156
                    do {
830
156
                        --prev_char_pos;
831
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
80
                        char_lookup.end()) {
835
52
                        break;
836
52
                    }
837
28
                    right_trim_pos = prev_char_pos;
838
28
                }
839
60
            }
840
841
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
87
        }
845
53
        return Status::OK();
846
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
788
19
                                    ColumnString::Offsets& res_offsets) {
789
19
        const size_t offset_size = str_offsets.size();
790
19
        res_offsets.resize(offset_size);
791
19
        res_data.reserve(str_data.size());
792
793
19
        std::unordered_set<std::string_view> char_lookup;
794
19
        const char* remove_begin = remove_str.data;
795
19
        const char* remove_end = remove_str.data + remove_str.size;
796
797
84
        while (remove_begin < remove_end) {
798
65
            size_t byte_len, char_len;
799
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
65
                    remove_begin, remove_end, 1);
801
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
65
            remove_begin += byte_len;
803
65
        }
804
805
52
        for (size_t i = 0; i < offset_size; ++i) {
806
33
            const char* str_begin =
807
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
33
            const char* left_trim_pos = str_begin;
810
33
            const char* right_trim_pos = str_end;
811
812
33
            if constexpr (is_ltrim) {
813
45
                while (left_trim_pos < str_end) {
814
41
                    size_t byte_len, char_len;
815
41
                    std::tie(byte_len, char_len) =
816
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
41
                                                                                   str_end, 1);
818
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
41
                        char_lookup.end()) {
820
29
                        break;
821
29
                    }
822
12
                    left_trim_pos += byte_len;
823
12
                }
824
33
            }
825
826
33
            if constexpr (is_rtrim) {
827
48
                while (right_trim_pos > left_trim_pos) {
828
44
                    const char* prev_char_pos = right_trim_pos;
829
90
                    do {
830
90
                        --prev_char_pos;
831
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
44
                        char_lookup.end()) {
835
29
                        break;
836
29
                    }
837
15
                    right_trim_pos = prev_char_pos;
838
15
                }
839
33
            }
840
841
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
33
        }
845
19
        return Status::OK();
846
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
788
17
                                    ColumnString::Offsets& res_offsets) {
789
17
        const size_t offset_size = str_offsets.size();
790
17
        res_offsets.resize(offset_size);
791
17
        res_data.reserve(str_data.size());
792
793
17
        std::unordered_set<std::string_view> char_lookup;
794
17
        const char* remove_begin = remove_str.data;
795
17
        const char* remove_end = remove_str.data + remove_str.size;
796
797
78
        while (remove_begin < remove_end) {
798
61
            size_t byte_len, char_len;
799
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
61
                    remove_begin, remove_end, 1);
801
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
61
            remove_begin += byte_len;
803
61
        }
804
805
44
        for (size_t i = 0; i < offset_size; ++i) {
806
27
            const char* str_begin =
807
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
27
            const char* left_trim_pos = str_begin;
810
27
            const char* right_trim_pos = str_end;
811
812
27
            if constexpr (is_ltrim) {
813
36
                while (left_trim_pos < str_end) {
814
32
                    size_t byte_len, char_len;
815
32
                    std::tie(byte_len, char_len) =
816
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
32
                                                                                   str_end, 1);
818
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
32
                        char_lookup.end()) {
820
23
                        break;
821
23
                    }
822
9
                    left_trim_pos += byte_len;
823
9
                }
824
27
            }
825
826
            if constexpr (is_rtrim) {
827
                while (right_trim_pos > left_trim_pos) {
828
                    const char* prev_char_pos = right_trim_pos;
829
                    do {
830
                        --prev_char_pos;
831
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
                        char_lookup.end()) {
835
                        break;
836
                    }
837
                    right_trim_pos = prev_char_pos;
838
                }
839
            }
840
841
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
27
        }
845
17
        return Status::OK();
846
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
788
17
                                    ColumnString::Offsets& res_offsets) {
789
17
        const size_t offset_size = str_offsets.size();
790
17
        res_offsets.resize(offset_size);
791
17
        res_data.reserve(str_data.size());
792
793
17
        std::unordered_set<std::string_view> char_lookup;
794
17
        const char* remove_begin = remove_str.data;
795
17
        const char* remove_end = remove_str.data + remove_str.size;
796
797
78
        while (remove_begin < remove_end) {
798
61
            size_t byte_len, char_len;
799
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
61
                    remove_begin, remove_end, 1);
801
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
61
            remove_begin += byte_len;
803
61
        }
804
805
44
        for (size_t i = 0; i < offset_size; ++i) {
806
27
            const char* str_begin =
807
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
27
            const char* left_trim_pos = str_begin;
810
27
            const char* right_trim_pos = str_end;
811
812
            if constexpr (is_ltrim) {
813
                while (left_trim_pos < str_end) {
814
                    size_t byte_len, char_len;
815
                    std::tie(byte_len, char_len) =
816
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
                                                                                   str_end, 1);
818
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
                        char_lookup.end()) {
820
                        break;
821
                    }
822
                    left_trim_pos += byte_len;
823
                }
824
            }
825
826
27
            if constexpr (is_rtrim) {
827
40
                while (right_trim_pos > left_trim_pos) {
828
36
                    const char* prev_char_pos = right_trim_pos;
829
66
                    do {
830
66
                        --prev_char_pos;
831
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
36
                        char_lookup.end()) {
835
23
                        break;
836
23
                    }
837
13
                    right_trim_pos = prev_char_pos;
838
13
                }
839
27
            }
840
841
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
27
        }
845
17
        return Status::OK();
846
17
    }
847
};
848
// This is an implementation of a parameter for the Trim function.
849
template <bool is_ltrim, bool is_rtrim, typename Name>
850
struct Trim1Impl {
851
    static constexpr auto name = Name::name;
852
853
152
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
853
44
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
853
35
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
853
40
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
853
8
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
853
12
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
853
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
854
855
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
856
140
                          uint32_t result, size_t input_rows_count) {
857
140
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
140
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
140
            auto col_res = ColumnString::create();
860
140
            char blank[] = " ";
861
140
            const StringRef remove_str(blank, 1);
862
140
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
140
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
140
                    col_res->get_offsets())));
865
140
            block.replace_by_position(result, std::move(col_res));
866
140
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
140
        return Status::OK();
872
140
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
48
                          uint32_t result, size_t input_rows_count) {
857
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
48
            auto col_res = ColumnString::create();
860
48
            char blank[] = " ";
861
48
            const StringRef remove_str(blank, 1);
862
48
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
48
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
48
                    col_res->get_offsets())));
865
48
            block.replace_by_position(result, std::move(col_res));
866
48
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
48
        return Status::OK();
872
48
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
38
                          uint32_t result, size_t input_rows_count) {
857
38
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
38
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
38
            auto col_res = ColumnString::create();
860
38
            char blank[] = " ";
861
38
            const StringRef remove_str(blank, 1);
862
38
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
38
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
38
                    col_res->get_offsets())));
865
38
            block.replace_by_position(result, std::move(col_res));
866
38
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
38
        return Status::OK();
872
38
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
42
                          uint32_t result, size_t input_rows_count) {
857
42
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
42
            auto col_res = ColumnString::create();
860
42
            char blank[] = " ";
861
42
            const StringRef remove_str(blank, 1);
862
42
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
42
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
42
                    col_res->get_offsets())));
865
42
            block.replace_by_position(result, std::move(col_res));
866
42
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
42
        return Status::OK();
872
42
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
1
                          uint32_t result, size_t input_rows_count) {
857
1
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
1
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
1
            auto col_res = ColumnString::create();
860
1
            char blank[] = " ";
861
1
            const StringRef remove_str(blank, 1);
862
1
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
1
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
1
                    col_res->get_offsets())));
865
1
            block.replace_by_position(result, std::move(col_res));
866
1
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
1
        return Status::OK();
872
1
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
5
                          uint32_t result, size_t input_rows_count) {
857
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
5
            auto col_res = ColumnString::create();
860
5
            char blank[] = " ";
861
5
            const StringRef remove_str(blank, 1);
862
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
5
                    col_res->get_offsets())));
865
5
            block.replace_by_position(result, std::move(col_res));
866
5
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
5
        return Status::OK();
872
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
6
                          uint32_t result, size_t input_rows_count) {
857
6
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
6
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
6
            auto col_res = ColumnString::create();
860
6
            char blank[] = " ";
861
6
            const StringRef remove_str(blank, 1);
862
6
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
6
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
6
                    col_res->get_offsets())));
865
6
            block.replace_by_position(result, std::move(col_res));
866
6
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
6
        return Status::OK();
872
6
    }
873
};
874
875
// This is an implementation of two parameters for the Trim function.
876
template <bool is_ltrim, bool is_rtrim, typename Name>
877
struct Trim2Impl {
878
    static constexpr auto name = Name::name;
879
880
220
    static DataTypes get_variadic_argument_types() {
881
220
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
220
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
880
19
    static DataTypes get_variadic_argument_types() {
881
19
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
19
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
880
28
    static DataTypes get_variadic_argument_types() {
881
28
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
28
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
880
83
    static DataTypes get_variadic_argument_types() {
881
83
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
83
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
880
26
    static DataTypes get_variadic_argument_types() {
881
26
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
26
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
880
28
    static DataTypes get_variadic_argument_types() {
881
28
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
28
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
880
36
    static DataTypes get_variadic_argument_types() {
881
36
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
36
    }
883
884
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
885
282
                          uint32_t result, size_t input_rows_count) {
886
282
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
282
        const auto& rcol =
888
282
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
282
                        ->get_data_column_ptr();
890
282
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
282
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
282
                auto col_res = ColumnString::create();
893
282
                const auto* remove_str_raw = col_right->get_chars().data();
894
282
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
282
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
282
                if (remove_str.size == 1) {
898
65
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
65
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
65
                            col_res->get_offsets())));
901
217
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
121
                                  std::is_same<Name, NameRTrimIn>::value) {
905
121
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
121
                                col->get_chars(), col->get_offsets(), remove_str,
907
121
                                col_res->get_chars(), col_res->get_offsets())));
908
121
                    } else {
909
96
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
96
                                col->get_chars(), col->get_offsets(), remove_str,
911
96
                                col_res->get_chars(), col_res->get_offsets())));
912
96
                    }
913
217
                }
914
282
                block.replace_by_position(result, std::move(col_res));
915
282
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
282
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
282
        return Status::OK();
927
282
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
26
                          uint32_t result, size_t input_rows_count) {
886
26
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
26
        const auto& rcol =
888
26
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
26
                        ->get_data_column_ptr();
890
26
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
26
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
26
                auto col_res = ColumnString::create();
893
26
                const auto* remove_str_raw = col_right->get_chars().data();
894
26
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
26
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
26
                if (remove_str.size == 1) {
898
2
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
2
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
2
                            col_res->get_offsets())));
901
24
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
                                  std::is_same<Name, NameRTrimIn>::value) {
905
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
                                col->get_chars(), col->get_offsets(), remove_str,
907
                                col_res->get_chars(), col_res->get_offsets())));
908
24
                    } else {
909
24
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
24
                                col->get_chars(), col->get_offsets(), remove_str,
911
24
                                col_res->get_chars(), col_res->get_offsets())));
912
24
                    }
913
24
                }
914
26
                block.replace_by_position(result, std::move(col_res));
915
26
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
26
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
26
        return Status::OK();
927
26
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
32
                          uint32_t result, size_t input_rows_count) {
886
32
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
32
        const auto& rcol =
888
32
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
32
                        ->get_data_column_ptr();
890
32
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
32
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
32
                auto col_res = ColumnString::create();
893
32
                const auto* remove_str_raw = col_right->get_chars().data();
894
32
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
32
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
32
                if (remove_str.size == 1) {
898
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
5
                            col_res->get_offsets())));
901
27
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
                                  std::is_same<Name, NameRTrimIn>::value) {
905
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
                                col->get_chars(), col->get_offsets(), remove_str,
907
                                col_res->get_chars(), col_res->get_offsets())));
908
27
                    } else {
909
27
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
27
                                col->get_chars(), col->get_offsets(), remove_str,
911
27
                                col_res->get_chars(), col_res->get_offsets())));
912
27
                    }
913
27
                }
914
32
                block.replace_by_position(result, std::move(col_res));
915
32
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
32
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
32
        return Status::OK();
927
32
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
85
                          uint32_t result, size_t input_rows_count) {
886
85
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
85
        const auto& rcol =
888
85
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
85
                        ->get_data_column_ptr();
890
85
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
85
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
85
                auto col_res = ColumnString::create();
893
85
                const auto* remove_str_raw = col_right->get_chars().data();
894
85
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
85
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
85
                if (remove_str.size == 1) {
898
40
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
40
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
40
                            col_res->get_offsets())));
901
45
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
                                  std::is_same<Name, NameRTrimIn>::value) {
905
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
                                col->get_chars(), col->get_offsets(), remove_str,
907
                                col_res->get_chars(), col_res->get_offsets())));
908
45
                    } else {
909
45
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
45
                                col->get_chars(), col->get_offsets(), remove_str,
911
45
                                col_res->get_chars(), col_res->get_offsets())));
912
45
                    }
913
45
                }
914
85
                block.replace_by_position(result, std::move(col_res));
915
85
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
85
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
85
        return Status::OK();
927
85
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
50
                          uint32_t result, size_t input_rows_count) {
886
50
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
50
        const auto& rcol =
888
50
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
50
                        ->get_data_column_ptr();
890
50
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
50
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
50
                auto col_res = ColumnString::create();
893
50
                const auto* remove_str_raw = col_right->get_chars().data();
894
50
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
50
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
50
                if (remove_str.size == 1) {
898
7
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
7
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
7
                            col_res->get_offsets())));
901
43
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
43
                                  std::is_same<Name, NameRTrimIn>::value) {
905
43
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
43
                                col->get_chars(), col->get_offsets(), remove_str,
907
43
                                col_res->get_chars(), col_res->get_offsets())));
908
                    } else {
909
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
                                col->get_chars(), col->get_offsets(), remove_str,
911
                                col_res->get_chars(), col_res->get_offsets())));
912
                    }
913
43
                }
914
50
                block.replace_by_position(result, std::move(col_res));
915
50
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
50
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
50
        return Status::OK();
927
50
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
41
                          uint32_t result, size_t input_rows_count) {
886
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
41
        const auto& rcol =
888
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
41
                        ->get_data_column_ptr();
890
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
41
                auto col_res = ColumnString::create();
893
41
                const auto* remove_str_raw = col_right->get_chars().data();
894
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
41
                if (remove_str.size == 1) {
898
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
5
                            col_res->get_offsets())));
901
36
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
36
                                  std::is_same<Name, NameRTrimIn>::value) {
905
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
36
                                col->get_chars(), col->get_offsets(), remove_str,
907
36
                                col_res->get_chars(), col_res->get_offsets())));
908
                    } else {
909
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
                                col->get_chars(), col->get_offsets(), remove_str,
911
                                col_res->get_chars(), col_res->get_offsets())));
912
                    }
913
36
                }
914
41
                block.replace_by_position(result, std::move(col_res));
915
41
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
41
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
41
        return Status::OK();
927
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
48
                          uint32_t result, size_t input_rows_count) {
886
48
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
48
        const auto& rcol =
888
48
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
48
                        ->get_data_column_ptr();
890
48
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
48
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
48
                auto col_res = ColumnString::create();
893
48
                const auto* remove_str_raw = col_right->get_chars().data();
894
48
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
48
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
48
                if (remove_str.size == 1) {
898
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
6
                            col_res->get_offsets())));
901
42
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
42
                                  std::is_same<Name, NameRTrimIn>::value) {
905
42
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
42
                                col->get_chars(), col->get_offsets(), remove_str,
907
42
                                col_res->get_chars(), col_res->get_offsets())));
908
                    } else {
909
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
                                col->get_chars(), col->get_offsets(), remove_str,
911
                                col_res->get_chars(), col_res->get_offsets())));
912
                    }
913
42
                }
914
48
                block.replace_by_position(result, std::move(col_res));
915
48
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
48
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
48
        return Status::OK();
927
48
    }
928
};
929
930
template <typename impl>
931
class FunctionTrim : public IFunction {
932
public:
933
    static constexpr auto name = impl::name;
934
384
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
934
45
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
934
36
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
934
41
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
934
20
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
934
29
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
934
84
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
934
9
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
934
13
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
934
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
934
27
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
934
29
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
934
37
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
935
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
936
937
288
    size_t get_number_of_arguments() const override {
938
288
        return get_variadic_argument_types_impl().size();
939
288
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
37
    size_t get_number_of_arguments() const override {
938
37
        return get_variadic_argument_types_impl().size();
939
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
28
    size_t get_number_of_arguments() const override {
938
28
        return get_variadic_argument_types_impl().size();
939
28
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
33
    size_t get_number_of_arguments() const override {
938
33
        return get_variadic_argument_types_impl().size();
939
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
12
    size_t get_number_of_arguments() const override {
938
12
        return get_variadic_argument_types_impl().size();
939
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
21
    size_t get_number_of_arguments() const override {
938
21
        return get_variadic_argument_types_impl().size();
939
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
76
    size_t get_number_of_arguments() const override {
938
76
        return get_variadic_argument_types_impl().size();
939
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
1
    size_t get_number_of_arguments() const override {
938
1
        return get_variadic_argument_types_impl().size();
939
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
5
    size_t get_number_of_arguments() const override {
938
5
        return get_variadic_argument_types_impl().size();
939
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
6
    size_t get_number_of_arguments() const override {
938
6
        return get_variadic_argument_types_impl().size();
939
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
19
    size_t get_number_of_arguments() const override {
938
19
        return get_variadic_argument_types_impl().size();
939
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
21
    size_t get_number_of_arguments() const override {
938
21
        return get_variadic_argument_types_impl().size();
939
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
29
    size_t get_number_of_arguments() const override {
938
29
        return get_variadic_argument_types_impl().size();
939
29
    }
940
941
288
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
288
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
288
        return arguments[0];
948
288
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
37
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
37
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
37
        return arguments[0];
948
37
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
28
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
28
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
28
        return arguments[0];
948
28
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
33
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
33
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
33
        return arguments[0];
948
33
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
12
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
12
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
12
        return arguments[0];
948
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
21
        return arguments[0];
948
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
76
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
76
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
76
        return arguments[0];
948
76
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
1
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
1
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
1
        return arguments[0];
948
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
5
        return arguments[0];
948
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
6
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
6
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
6
        return arguments[0];
948
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
19
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
19
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
19
        return arguments[0];
948
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
21
        return arguments[0];
948
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
29
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
29
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
29
        return arguments[0];
948
29
    }
949
    // The second parameter of "trim" is a constant.
950
571
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
85
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
64
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
37
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
41
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
96
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
1
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
6
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
67
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
59
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
951
952
372
    DataTypes get_variadic_argument_types_impl() const override {
953
372
        return impl::get_variadic_argument_types();
954
372
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
44
    DataTypes get_variadic_argument_types_impl() const override {
953
44
        return impl::get_variadic_argument_types();
954
44
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
35
    DataTypes get_variadic_argument_types_impl() const override {
953
35
        return impl::get_variadic_argument_types();
954
35
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
40
    DataTypes get_variadic_argument_types_impl() const override {
953
40
        return impl::get_variadic_argument_types();
954
40
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
19
    DataTypes get_variadic_argument_types_impl() const override {
953
19
        return impl::get_variadic_argument_types();
954
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
28
    DataTypes get_variadic_argument_types_impl() const override {
953
28
        return impl::get_variadic_argument_types();
954
28
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
83
    DataTypes get_variadic_argument_types_impl() const override {
953
83
        return impl::get_variadic_argument_types();
954
83
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
8
    DataTypes get_variadic_argument_types_impl() const override {
953
8
        return impl::get_variadic_argument_types();
954
8
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
12
    DataTypes get_variadic_argument_types_impl() const override {
953
12
        return impl::get_variadic_argument_types();
954
12
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
13
    DataTypes get_variadic_argument_types_impl() const override {
953
13
        return impl::get_variadic_argument_types();
954
13
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
26
    DataTypes get_variadic_argument_types_impl() const override {
953
26
        return impl::get_variadic_argument_types();
954
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
28
    DataTypes get_variadic_argument_types_impl() const override {
953
28
        return impl::get_variadic_argument_types();
954
28
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
36
    DataTypes get_variadic_argument_types_impl() const override {
953
36
        return impl::get_variadic_argument_types();
954
36
    }
955
956
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
957
422
                        uint32_t result, size_t input_rows_count) const override {
958
422
        return impl::execute(context, block, arguments, result, input_rows_count);
959
422
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
48
                        uint32_t result, size_t input_rows_count) const override {
958
48
        return impl::execute(context, block, arguments, result, input_rows_count);
959
48
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
38
                        uint32_t result, size_t input_rows_count) const override {
958
38
        return impl::execute(context, block, arguments, result, input_rows_count);
959
38
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
42
                        uint32_t result, size_t input_rows_count) const override {
958
42
        return impl::execute(context, block, arguments, result, input_rows_count);
959
42
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
26
                        uint32_t result, size_t input_rows_count) const override {
958
26
        return impl::execute(context, block, arguments, result, input_rows_count);
959
26
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
32
                        uint32_t result, size_t input_rows_count) const override {
958
32
        return impl::execute(context, block, arguments, result, input_rows_count);
959
32
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
85
                        uint32_t result, size_t input_rows_count) const override {
958
85
        return impl::execute(context, block, arguments, result, input_rows_count);
959
85
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
1
                        uint32_t result, size_t input_rows_count) const override {
958
1
        return impl::execute(context, block, arguments, result, input_rows_count);
959
1
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
5
                        uint32_t result, size_t input_rows_count) const override {
958
5
        return impl::execute(context, block, arguments, result, input_rows_count);
959
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
6
                        uint32_t result, size_t input_rows_count) const override {
958
6
        return impl::execute(context, block, arguments, result, input_rows_count);
959
6
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
50
                        uint32_t result, size_t input_rows_count) const override {
958
50
        return impl::execute(context, block, arguments, result, input_rows_count);
959
50
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
41
                        uint32_t result, size_t input_rows_count) const override {
958
41
        return impl::execute(context, block, arguments, result, input_rows_count);
959
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
48
                        uint32_t result, size_t input_rows_count) const override {
958
48
        return impl::execute(context, block, arguments, result, input_rows_count);
959
48
    }
960
};
961
962
struct UnHexImplEmpty {
963
    static constexpr auto name = "unhex";
964
};
965
966
struct UnHexImplNull {
967
    static constexpr auto name = "unhex_null";
968
};
969
970
template <typename Name>
971
struct UnHexImpl {
972
    static constexpr auto name = Name::name;
973
    using ReturnType = DataTypeString;
974
    using ColumnType = ColumnString;
975
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
976
977
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
978
156
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
979
156
        auto rows_count = offsets.size();
980
156
        dst_offsets.resize(rows_count);
981
982
156
        int64_t total_size = 0;
983
389
        for (size_t i = 0; i < rows_count; i++) {
984
233
            size_t len = offsets[i] - offsets[i - 1];
985
233
            total_size += len / 2;
986
233
        }
987
156
        ColumnString::check_chars_length(total_size, rows_count);
988
156
        dst_data.resize(total_size);
989
156
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
990
156
        size_t offset = 0;
991
992
389
        for (int i = 0; i < rows_count; ++i) {
993
233
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
994
233
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
995
996
233
            if (UNLIKELY(srclen == 0)) {
997
14
                dst_offsets[i] = cast_set<uint32_t>(offset);
998
14
                continue;
999
14
            }
1000
1001
219
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1002
1003
219
            offset += outlen;
1004
219
            dst_offsets[i] = cast_set<uint32_t>(offset);
1005
219
        }
1006
156
        dst_data.pop_back(total_size - offset);
1007
156
        return Status::OK();
1008
156
    }
1009
1010
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1011
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1012
33
                         ColumnUInt8::Container* null_map_data) {
1013
33
        auto rows_count = offsets.size();
1014
33
        dst_offsets.resize(rows_count);
1015
1016
33
        int64_t total_size = 0;
1017
84
        for (size_t i = 0; i < rows_count; i++) {
1018
51
            size_t len = offsets[i] - offsets[i - 1];
1019
51
            total_size += len / 2;
1020
51
        }
1021
33
        ColumnString::check_chars_length(total_size, rows_count);
1022
33
        dst_data.resize(total_size);
1023
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1024
33
        size_t offset = 0;
1025
1026
84
        for (int i = 0; i < rows_count; ++i) {
1027
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1028
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1029
1030
51
            if (UNLIKELY(srclen == 0)) {
1031
7
                (*null_map_data)[i] = 1;
1032
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1033
7
                continue;
1034
7
            }
1035
1036
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1037
1038
44
            if (outlen == 0) {
1039
13
                (*null_map_data)[i] = 1;
1040
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1041
13
                continue;
1042
13
            }
1043
1044
31
            offset += outlen;
1045
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1046
31
        }
1047
33
        dst_data.pop_back(total_size - offset);
1048
33
        return Status::OK();
1049
33
    }
1050
};
1051
1052
struct NameStringSpace {
1053
    static constexpr auto name = "space";
1054
};
1055
1056
struct StringSpace {
1057
    using ReturnType = DataTypeString;
1058
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1059
    using Type = Int32;
1060
    using ReturnColumnType = ColumnString;
1061
1062
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1063
10
                         ColumnString::Offsets& res_offsets) {
1064
10
        res_offsets.resize(data.size());
1065
10
        size_t input_size = res_offsets.size();
1066
10
        int64_t total_size = 0;
1067
34
        for (size_t i = 0; i < input_size; ++i) {
1068
24
            if (data[i] > 0) {
1069
14
                total_size += data[i];
1070
14
            }
1071
24
        }
1072
10
        ColumnString::check_chars_length(total_size, input_size);
1073
10
        res_data.reserve(total_size);
1074
1075
34
        for (size_t i = 0; i < input_size; ++i) {
1076
24
            if (data[i] > 0) [[likely]] {
1077
14
                res_data.resize_fill(res_data.size() + data[i], ' ');
1078
14
                cast_set(res_offsets[i], res_data.size());
1079
14
            } else {
1080
10
                StringOP::push_empty_string(i, res_data, res_offsets);
1081
10
            }
1082
24
        }
1083
10
        return Status::OK();
1084
10
    }
1085
};
1086
1087
struct ToBase64Impl {
1088
    static constexpr auto name = "to_base64";
1089
    using ReturnType = DataTypeString;
1090
    using ColumnType = ColumnString;
1091
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1092
1093
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1094
117
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1095
117
        auto rows_count = offsets.size();
1096
117
        dst_offsets.resize(rows_count);
1097
1098
117
        size_t total_size = 0;
1099
276
        for (size_t i = 0; i < rows_count; i++) {
1100
159
            size_t len = offsets[i] - offsets[i - 1];
1101
159
            total_size += 4 * ((len + 2) / 3);
1102
159
        }
1103
117
        ColumnString::check_chars_length(total_size, rows_count);
1104
117
        dst_data.resize(total_size);
1105
117
        auto* dst_data_ptr = dst_data.data();
1106
117
        size_t offset = 0;
1107
1108
276
        for (int i = 0; i < rows_count; ++i) {
1109
159
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1110
159
            size_t srclen = offsets[i] - offsets[i - 1];
1111
1112
159
            if (UNLIKELY(srclen == 0)) {
1113
8
                dst_offsets[i] = cast_set<uint32_t>(offset);
1114
8
                continue;
1115
8
            }
1116
1117
151
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1118
151
                                               (unsigned char*)(dst_data_ptr + offset));
1119
1120
151
            offset += outlen;
1121
151
            dst_offsets[i] = cast_set<uint32_t>(offset);
1122
151
        }
1123
117
        dst_data.pop_back(total_size - offset);
1124
117
        return Status::OK();
1125
117
    }
1126
};
1127
1128
struct FromBase64Impl {
1129
    static constexpr auto name = "from_base64";
1130
    using ReturnType = DataTypeString;
1131
    using ColumnType = ColumnString;
1132
1133
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1134
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1135
119
                         NullMap& null_map) {
1136
119
        auto rows_count = offsets.size();
1137
119
        dst_offsets.resize(rows_count);
1138
1139
119
        size_t total_size = 0;
1140
297
        for (size_t i = 0; i < rows_count; i++) {
1141
178
            auto len = offsets[i] - offsets[i - 1];
1142
178
            total_size += len / 4 * 3;
1143
178
        }
1144
119
        ColumnString::check_chars_length(total_size, rows_count);
1145
119
        dst_data.resize(total_size);
1146
119
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1147
119
        size_t offset = 0;
1148
1149
297
        for (int i = 0; i < rows_count; ++i) {
1150
178
            if (UNLIKELY(null_map[i])) {
1151
0
                null_map[i] = 1;
1152
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1153
0
                continue;
1154
0
            }
1155
1156
178
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1157
178
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1158
1159
178
            if (UNLIKELY(srclen == 0)) {
1160
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1161
7
                continue;
1162
7
            }
1163
1164
171
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1165
1166
171
            if (outlen < 0) {
1167
64
                null_map[i] = 1;
1168
64
                dst_offsets[i] = cast_set<uint32_t>(offset);
1169
107
            } else {
1170
107
                offset += outlen;
1171
107
                dst_offsets[i] = cast_set<uint32_t>(offset);
1172
107
            }
1173
171
        }
1174
119
        dst_data.pop_back(total_size - offset);
1175
119
        return Status::OK();
1176
119
    }
1177
};
1178
1179
struct StringAppendTrailingCharIfAbsent {
1180
    static constexpr auto name = "append_trailing_char_if_absent";
1181
    using Chars = ColumnString::Chars;
1182
    using Offsets = ColumnString::Offsets;
1183
    using ReturnType = DataTypeString;
1184
    using ColumnType = ColumnString;
1185
1186
58
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1187
58
        if (str.size < end.size) {
1188
11
            return false;
1189
11
        }
1190
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1191
47
        return str.end_with(end);
1192
58
    }
1193
1194
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1195
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1196
56
                              Offsets& res_offsets, NullMap& null_map_data) {
1197
56
        DCHECK_EQ(loffsets.size(), roffsets.size());
1198
56
        size_t input_rows_count = loffsets.size();
1199
56
        res_offsets.resize(input_rows_count);
1200
56
        fmt::memory_buffer buffer;
1201
1202
158
        for (size_t i = 0; i < input_rows_count; ++i) {
1203
102
            buffer.clear();
1204
1205
102
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1206
102
                                       loffsets[i] - loffsets[i - 1]);
1207
102
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1208
102
                                       roffsets[i] - roffsets[i - 1]);
1209
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1210
102
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1211
102
                    rstr.begin(), rstr.end(), 2);
1212
1213
102
            if (char_len != 1) {
1214
66
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1215
66
                continue;
1216
66
            }
1217
36
            if (str_end_with(lstr, rstr)) {
1218
9
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1219
9
                continue;
1220
9
            }
1221
1222
27
            buffer.append(lstr.begin(), lstr.end());
1223
27
            buffer.append(rstr.begin(), rstr.end());
1224
27
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1225
27
                                        res_offsets);
1226
27
        }
1227
56
    }
1228
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1229
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1230
9
                              NullMap& null_map_data) {
1231
9
        size_t input_rows_count = loffsets.size();
1232
9
        res_offsets.resize(input_rows_count);
1233
9
        fmt::memory_buffer buffer;
1234
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1235
9
        auto [byte_len, char_len] =
1236
9
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1237
9
        if (char_len != 1) {
1238
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1239
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1240
2
            }
1241
2
            return;
1242
2
        }
1243
1244
23
        for (size_t i = 0; i < input_rows_count; ++i) {
1245
16
            buffer.clear();
1246
16
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1247
16
                                       loffsets[i] - loffsets[i - 1]);
1248
1249
16
            if (str_end_with(lstr, rstr)) {
1250
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1251
2
                continue;
1252
2
            }
1253
1254
14
            buffer.append(lstr.begin(), lstr.end());
1255
14
            buffer.append(rstr.begin(), rstr.end());
1256
14
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1257
14
                                        res_offsets);
1258
14
        }
1259
7
    }
1260
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1261
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1262
8
                              NullMap& null_map_data) {
1263
8
        size_t input_rows_count = roffsets.size();
1264
8
        res_offsets.resize(input_rows_count);
1265
8
        fmt::memory_buffer buffer;
1266
1267
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1268
8
            buffer.clear();
1269
1270
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1271
8
                                       roffsets[i] - roffsets[i - 1]);
1272
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1273
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1274
8
                    rstr.begin(), rstr.end(), 2);
1275
1276
8
            if (char_len != 1) {
1277
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1278
2
                continue;
1279
2
            }
1280
6
            if (str_end_with(lstr, rstr)) {
1281
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1282
2
                continue;
1283
2
            }
1284
1285
4
            buffer.append(lstr.begin(), lstr.end());
1286
4
            buffer.append(rstr.begin(), rstr.end());
1287
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1288
4
                                        res_offsets);
1289
4
        }
1290
8
    }
1291
};
1292
1293
struct StringLPad {
1294
    static constexpr auto name = "lpad";
1295
    static constexpr auto is_lpad = true;
1296
};
1297
1298
struct StringRPad {
1299
    static constexpr auto name = "rpad";
1300
    static constexpr auto is_lpad = false;
1301
};
1302
1303
template <typename LeftDataType, typename RightDataType>
1304
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1305
1306
template <typename LeftDataType, typename RightDataType>
1307
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1308
1309
template <typename LeftDataType, typename RightDataType>
1310
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1311
1312
// ready for regist function
1313
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1314
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1315
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1316
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1317
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1318
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1319
using FunctionStringStartsWith =
1320
        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
1321
using FunctionStringEndsWith =
1322
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1323
using FunctionStringInstr =
1324
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1325
using FunctionStringLocate =
1326
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1327
using FunctionStringFindInSet =
1328
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1329
1330
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1331
1332
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1333
1334
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1335
1336
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1337
1338
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1339
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1340
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1341
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1342
1343
using FunctionStringAppendTrailingCharIfAbsent =
1344
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1345
1346
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1347
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1348
1349
using FunctionMakeSet = FunctionNeedsToHandleNull<MakeSetImpl, PrimitiveType::TYPE_STRING>;
1350
1351
7
void register_function_string(SimpleFunctionFactory& factory) {
1352
7
    factory.register_function<FunctionStringParseDataSize>();
1353
7
    factory.register_function<FunctionStringASCII>();
1354
7
    factory.register_function<FunctionStringLength>();
1355
7
    factory.register_function<FunctionCrc32>();
1356
7
    factory.register_function<FunctionStringUTF8Length>();
1357
7
    factory.register_function<FunctionStringSpace>();
1358
7
    factory.register_function<FunctionStringStartsWith>();
1359
7
    factory.register_function<FunctionStringEndsWith>();
1360
7
    factory.register_function<FunctionStringInstr>();
1361
7
    factory.register_function<FunctionStringFindInSet>();
1362
7
    factory.register_function<FunctionStringLocate>();
1363
7
    factory.register_function<FunctionStringLocatePos>();
1364
7
    factory.register_function<FunctionQuote>();
1365
7
    factory.register_function<FunctionAutoPartitionName>();
1366
7
    factory.register_function<FunctionReverseCommon>();
1367
7
    factory.register_function<FunctionUnHex>();
1368
7
    factory.register_function<FunctionUnHexNullable>();
1369
7
    factory.register_function<FunctionToLower>();
1370
7
    factory.register_function<FunctionToUpper>();
1371
7
    factory.register_function<FunctionToInitcap>();
1372
7
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1373
7
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1374
7
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1375
7
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1376
7
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1377
7
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1378
7
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1379
7
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1380
7
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1381
7
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1382
7
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1383
7
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1384
7
    factory.register_function<FunctionConvertTo>();
1385
7
    factory.register_function<FunctionSubstring<Substr3Impl>>();
1386
7
    factory.register_function<FunctionSubstring<Substr2Impl>>();
1387
7
    factory.register_function<FunctionLeft>();
1388
7
    factory.register_function<FunctionRight>();
1389
7
    factory.register_function<FunctionNullOrEmpty>();
1390
7
    factory.register_function<FunctionNotNullOrEmpty>();
1391
7
    factory.register_function<FunctionStringConcat>();
1392
7
    factory.register_function<FunctionIntToChar>();
1393
7
    factory.register_function<FunctionStringElt>();
1394
7
    factory.register_function<FunctionStringConcatWs>();
1395
7
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1396
7
    factory.register_function<FunctionStringRepeat>();
1397
7
    factory.register_function<FunctionStringLPad>();
1398
7
    factory.register_function<FunctionStringRPad>();
1399
7
    factory.register_function<FunctionToBase64>();
1400
7
    factory.register_function<FunctionFromBase64>();
1401
7
    factory.register_function<FunctionSplitPart>();
1402
7
    factory.register_function<FunctionSplitByString>();
1403
7
    factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>();
1404
7
    factory.register_function<
1405
7
            FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>();
1406
7
    factory.register_function<FunctionSubstringIndex>();
1407
7
    factory.register_function<FunctionExtractURLParameter>();
1408
7
    factory.register_function<FunctionStringParseUrl>();
1409
7
    factory.register_function<FunctionUrlDecode>();
1410
7
    factory.register_function<FunctionUrlEncode>();
1411
7
    factory.register_function<FunctionRandomBytes>();
1412
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1413
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1414
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1415
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1416
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1417
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1418
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1419
7
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1420
7
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1421
7
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1422
7
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1423
7
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1424
7
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1425
7
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1426
7
    factory.register_function<
1427
7
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1428
7
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1429
7
    factory.register_function<FunctionStringDigestMulti<SM3Sum>>();
1430
7
    factory.register_function<FunctionStringDigestMulti<MD5Sum>>();
1431
7
    factory.register_function<FunctionStringDigestSHA1>();
1432
7
    factory.register_function<FunctionStringDigestSHA2>();
1433
7
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1434
7
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1435
7
    factory.register_function<FunctionTranslate>();
1436
7
    factory.register_function<FunctionMask>();
1437
7
    factory.register_function<FunctionMaskPartial<true>>();
1438
7
    factory.register_function<FunctionMaskPartial<false>>();
1439
7
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1440
7
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1441
7
    factory.register_function<FunctionOverlay>();
1442
7
    factory.register_function<FunctionStrcmp>();
1443
7
    factory.register_function<FunctionNgramSearch>();
1444
7
    factory.register_function<FunctionXPathString>();
1445
7
    factory.register_function<FunctionCrc32Internal>();
1446
7
    factory.register_function<FunctionMakeSet>();
1447
7
    factory.register_function<FunctionExportSet>();
1448
7
    factory.register_function<FunctionUnicodeNormalize>();
1449
1450
7
    factory.register_alias(FunctionLeft::name, "strleft");
1451
7
    factory.register_alias(FunctionRight::name, "strright");
1452
7
    factory.register_alias(SubstringUtil::name, "substr");
1453
7
    factory.register_alias(SubstringUtil::name, "mid");
1454
7
    factory.register_alias(FunctionToLower::name, "lcase");
1455
7
    factory.register_alias(FunctionToUpper::name, "ucase");
1456
7
    factory.register_alias(FunctionStringDigestMulti<MD5Sum>::name, "md5");
1457
7
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1458
7
    factory.register_alias(FunctionStringDigestMulti<SM3Sum>::name, "sm3");
1459
7
    factory.register_alias(FunctionStringDigestSHA1::name, "sha");
1460
7
    factory.register_alias(FunctionStringLocatePos::name, "position");
1461
7
    factory.register_alias(FunctionStringLength::name, "octet_length");
1462
7
    factory.register_alias(FunctionOverlay::name, "insert");
1463
7
}
1464
1465
} // namespace doris