Coverage Report

Created: 2026-03-16 15:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
be/src/exprs/function/function_string.cpp
Line
Count
Source
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
#include "exprs/function/function_string.h"
19
20
#include <ctype.h>
21
#include <math.h>
22
#include <re2/stringpiece.h>
23
#include <unicode/schriter.h>
24
#include <unicode/uchar.h>
25
#include <unicode/unistr.h>
26
#include <unicode/ustream.h>
27
28
#include <bitset>
29
#include <cstddef>
30
#include <cstdint>
31
#include <string_view>
32
33
#include "common/cast_set.h"
34
#include "common/status.h"
35
#include "core/column/column.h"
36
#include "core/column/column_string.h"
37
#include "core/pod_array_fwd.h"
38
#include "core/string_ref.h"
39
#include "exprs/function/function_reverse.h"
40
#include "exprs/function/function_string_to_string.h"
41
#include "exprs/function/function_totype.h"
42
#include "exprs/function/simple_function_factory.h"
43
#include "exprs/function/string_hex_util.h"
44
#include "util/string_search.hpp"
45
#include "util/url_coding.h"
46
47
namespace doris {
48
#include "common/compile_check_begin.h"
49
struct NameStringASCII {
50
    static constexpr auto name = "ascii";
51
};
52
53
struct StringASCII {
54
    using ReturnType = DataTypeInt32;
55
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
56
    using Type = String;
57
    using ReturnColumnType = ColumnInt32;
58
59
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
60
28
                         PaddedPODArray<Int32>& res) {
61
28
        auto size = offsets.size();
62
28
        res.resize(size);
63
100
        for (int i = 0; i < size; ++i) {
64
72
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
65
72
            res[i] = (offsets[i] == offsets[i - 1]) ? 0 : static_cast<uint8_t>(raw_str[0]);
66
72
        }
67
28
        return Status::OK();
68
28
    }
69
};
70
71
struct NameParseDataSize {
72
    static constexpr auto name = "parse_data_size";
73
};
74
75
static const std::map<std::string_view, Int128> UNITS = {
76
        {"B", static_cast<Int128>(1)},        {"kB", static_cast<Int128>(1) << 10},
77
        {"MB", static_cast<Int128>(1) << 20}, {"GB", static_cast<Int128>(1) << 30},
78
        {"TB", static_cast<Int128>(1) << 40}, {"PB", static_cast<Int128>(1) << 50},
79
        {"EB", static_cast<Int128>(1) << 60}, {"ZB", static_cast<Int128>(1) << 70},
80
        {"YB", static_cast<Int128>(1) << 80}};
81
82
struct ParseDataSize {
83
    using ReturnType = DataTypeInt128;
84
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
85
    using Type = String;
86
    using ReturnColumnType = ColumnInt128;
87
88
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
89
48
                         PaddedPODArray<Int128>& res) {
90
48
        auto size = offsets.size();
91
48
        res.resize(size);
92
100
        for (int i = 0; i < size; ++i) {
93
52
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
94
52
            int str_size = offsets[i] - offsets[i - 1];
95
52
            res[i] = parse_data_size(std::string_view(raw_str, str_size));
96
52
        }
97
48
        return Status::OK();
98
48
    }
99
100
52
    static Int128 parse_data_size(const std::string_view& dataSize) {
101
52
        int digit_length = 0;
102
216
        for (char c : dataSize) {
103
216
            if (isdigit(c) || c == '.') {
104
166
                digit_length++;
105
166
            } else {
106
50
                break;
107
50
            }
108
216
        }
109
110
52
        if (digit_length == 0) {
111
4
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
112
4
                                   "Invalid Input argument \"{}\" of function parse_data_size",
113
4
                                   dataSize);
114
4
        }
115
        // 123.45MB--->123.45 : MB
116
48
        double value = 0.0;
117
48
        try {
118
48
            value = std::stod(std::string(dataSize.substr(0, digit_length)));
119
48
        } catch (const std::exception& e) {
120
0
            throw doris::Exception(
121
0
                    ErrorCode::INVALID_ARGUMENT,
122
0
                    "Invalid Input argument \"{}\" of function parse_data_size, error: {}",
123
0
                    dataSize, e.what());
124
0
        }
125
48
        auto unit = dataSize.substr(digit_length);
126
48
        auto it = UNITS.find(unit);
127
48
        if (it != UNITS.end()) {
128
45
            return static_cast<__int128>(static_cast<long double>(it->second) * value);
129
45
        } else {
130
3
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
131
3
                                   "Invalid Input argument \"{}\" of function parse_data_size",
132
3
                                   dataSize);
133
3
        }
134
48
    }
135
};
136
137
struct NameQuote {
138
    static constexpr auto name = "quote";
139
};
140
141
struct NameQuoteImpl {
142
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
143
12
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
144
12
        size_t offset_size = offsets.size();
145
12
        ColumnString::Offset pos = 0;
146
12
        res_offsets.resize(offset_size);
147
12
        res_data.resize(data.size() + offset_size * 2);
148
35
        for (int i = 0; i < offset_size; i++) {
149
23
            const unsigned char* raw_str = &data[offsets[i - 1]];
150
23
            ColumnString::Offset size = offsets[i] - offsets[i - 1];
151
23
            res_data[pos] = '\'';
152
23
            std::memcpy(res_data.data() + pos + 1, raw_str, size);
153
23
            res_data[pos + size + 1] = '\'';
154
23
            pos += size + 2;
155
23
            res_offsets[i] = pos;
156
23
        }
157
12
        return Status::OK();
158
12
    }
159
};
160
161
struct NameStringLength {
162
    static constexpr auto name = "length";
163
};
164
165
struct StringLengthImpl {
166
    using ReturnType = DataTypeInt32;
167
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
168
    using Type = String;
169
    using ReturnColumnType = ColumnInt32;
170
171
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
172
6.85k
                         PaddedPODArray<Int32>& res) {
173
6.85k
        auto size = offsets.size();
174
6.85k
        res.resize(size);
175
4.38M
        for (int i = 0; i < size; ++i) {
176
4.37M
            int str_size = offsets[i] - offsets[i - 1];
177
4.37M
            res[i] = str_size;
178
4.37M
        }
179
6.85k
        return Status::OK();
180
6.85k
    }
181
};
182
183
struct NameCrc32 {
184
    static constexpr auto name = "crc32";
185
};
186
187
struct Crc32Impl {
188
    using ReturnType = DataTypeInt64;
189
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
190
    using Type = String;
191
    using ReturnColumnType = ColumnInt64;
192
193
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
194
3
                         PaddedPODArray<Int64>& res) {
195
3
        auto size = offsets.size();
196
3
        res.resize(size);
197
6
        for (int i = 0; i < size; ++i) {
198
3
            res[i] = crc32_z(0L, (const unsigned char*)data.data() + offsets[i - 1],
199
3
                             offsets[i] - offsets[i - 1]);
200
3
        }
201
3
        return Status::OK();
202
3
    }
203
};
204
205
struct NameStringUtf8Length {
206
    static constexpr auto name = "char_length";
207
};
208
209
struct StringUtf8LengthImpl {
210
    using ReturnType = DataTypeInt32;
211
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
212
    using Type = String;
213
    using ReturnColumnType = ColumnInt32;
214
215
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
216
27
                         PaddedPODArray<Int32>& res) {
217
27
        auto size = offsets.size();
218
27
        res.resize(size);
219
98
        for (int i = 0; i < size; ++i) {
220
71
            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
221
71
            int str_size = offsets[i] - offsets[i - 1];
222
71
            res[i] = simd::VStringFunctions::get_char_len(raw_str, str_size);
223
71
        }
224
27
        return Status::OK();
225
27
    }
226
};
227
228
struct NameStartsWith {
229
    static constexpr auto name = "starts_with";
230
};
231
232
struct StartsWithOp {
233
    using ResultDataType = DataTypeUInt8;
234
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
235
236
137
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
237
137
        res = strl.starts_with(strr);
238
137
    }
239
};
240
241
struct NameEndsWith {
242
    static constexpr auto name = "ends_with";
243
};
244
245
struct EndsWithOp {
246
    using ResultDataType = DataTypeUInt8;
247
    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
248
249
112
    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
250
112
        res = strl.ends_with(strr);
251
112
    }
252
};
253
254
struct NameFindInSet {
255
    static constexpr auto name = "find_in_set";
256
};
257
258
struct FindInSetOp {
259
    using ResultDataType = DataTypeInt32;
260
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
261
140
    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
262
623
        for (const auto& c : strl) {
263
623
            if (c == ',') {
264
17
                res = 0;
265
17
                return;
266
17
            }
267
623
        }
268
269
123
        int32_t token_index = 1;
270
123
        int32_t start = 0;
271
123
        int32_t end;
272
273
196
        do {
274
196
            end = start;
275
            // Position end.
276
853
            while (end < strr.length() && strr[end] != ',') {
277
657
                ++end;
278
657
            }
279
280
196
            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
281
82
                res = token_index;
282
82
                return;
283
82
            }
284
285
            // Re-position start and end past ','
286
114
            start = end + 1;
287
114
            ++token_index;
288
114
        } while (start < strr.length());
289
41
        res = 0;
290
41
    }
291
};
292
293
struct NameInstr {
294
    static constexpr auto name = "instr";
295
};
296
297
// LeftDataType and RightDataType are DataTypeString
298
template <typename LeftDataType, typename RightDataType>
299
struct StringInStrImpl {
300
    using ResultDataType = DataTypeInt32;
301
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
302
303
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
304
72
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
305
72
        StringRef lstr_ref(ldata.data, ldata.size);
306
307
72
        auto size = roffsets.size();
308
72
        res.resize(size);
309
144
        for (int i = 0; i < size; ++i) {
310
72
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
311
72
            int r_str_size = roffsets[i] - roffsets[i - 1];
312
313
72
            StringRef rstr_ref(r_raw_str, r_str_size);
314
315
72
            res[i] = execute(lstr_ref, rstr_ref);
316
72
        }
317
318
72
        return Status::OK();
319
72
    }
320
321
    static Status vector_scalar(const ColumnString::Chars& ldata,
322
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
323
88
                                ResultPaddedPODArray& res) {
324
88
        auto size = loffsets.size();
325
88
        res.resize(size);
326
327
88
        if (rdata.size == 0) {
328
12
            std::fill(res.begin(), res.end(), 1);
329
12
            return Status::OK();
330
12
        }
331
332
76
        const UInt8* begin = ldata.data();
333
76
        const UInt8* end = begin + ldata.size();
334
76
        const UInt8* pos = begin;
335
336
        /// Current index in the array of strings.
337
76
        size_t i = 0;
338
76
        std::fill(res.begin(), res.end(), 0);
339
340
76
        StringRef rstr_ref(rdata.data, rdata.size);
341
76
        StringSearch search(&rstr_ref);
342
343
92
        while (pos < end) {
344
            // search return matched substring start offset
345
66
            pos = (UInt8*)search.search((char*)pos, end - pos);
346
66
            if (pos >= end) {
347
50
                break;
348
50
            }
349
350
            /// Determine which index it refers to.
351
            /// begin + value_offsets[i] is the start offset of string at i+1
352
16
            while (begin + loffsets[i] < pos) {
353
0
                ++i;
354
0
            }
355
356
            /// We check that the entry does not pass through the boundaries of strings.
357
16
            if (pos + rdata.size <= begin + loffsets[i]) {
358
16
                int loc = (int)(pos - begin) - loffsets[i - 1];
359
16
                int l_str_size = loffsets[i] - loffsets[i - 1];
360
16
                auto len = std::min(l_str_size, loc);
361
16
                loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
362
16
                res[i] = loc + 1;
363
16
            }
364
365
            // move to next string offset
366
16
            pos = begin + loffsets[i];
367
16
            ++i;
368
16
        }
369
370
76
        return Status::OK();
371
88
    }
372
373
    static Status vector_vector(const ColumnString::Chars& ldata,
374
                                const ColumnString::Offsets& loffsets,
375
                                const ColumnString::Chars& rdata,
376
138
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
377
138
        DCHECK_EQ(loffsets.size(), roffsets.size());
378
379
138
        auto size = loffsets.size();
380
138
        res.resize(size);
381
523
        for (int i = 0; i < size; ++i) {
382
385
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
383
385
            int l_str_size = loffsets[i] - loffsets[i - 1];
384
385
            StringRef lstr_ref(l_raw_str, l_str_size);
385
386
385
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
387
385
            int r_str_size = roffsets[i] - roffsets[i - 1];
388
385
            StringRef rstr_ref(r_raw_str, r_str_size);
389
390
385
            res[i] = execute(lstr_ref, rstr_ref);
391
385
        }
392
393
138
        return Status::OK();
394
138
    }
395
396
457
    static int execute(const StringRef& strl, const StringRef& strr) {
397
457
        if (strr.size == 0) {
398
62
            return 1;
399
62
        }
400
401
395
        StringSearch search(&strr);
402
        // Hive returns positions starting from 1.
403
395
        int loc = search.search(&strl);
404
395
        if (loc > 0) {
405
12
            int len = std::min(loc, (int)strl.size);
406
12
            loc = simd::VStringFunctions::get_char_len(strl.data, len);
407
12
        }
408
409
395
        return loc + 1;
410
457
    }
411
};
412
413
// the same impl as instr
414
struct NameLocate {
415
    static constexpr auto name = "locate";
416
};
417
418
// LeftDataType and RightDataType are DataTypeString
419
template <typename LeftDataType, typename RightDataType>
420
struct StringLocateImpl {
421
    using ResultDataType = DataTypeInt32;
422
    using ResultPaddedPODArray = PaddedPODArray<Int32>;
423
424
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
425
38
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
426
38
        return StringInStrImpl<LeftDataType, RightDataType>::vector_scalar(rdata, roffsets, ldata,
427
38
                                                                           res);
428
38
    }
429
430
    static Status vector_scalar(const ColumnString::Chars& ldata,
431
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
432
36
                                ResultPaddedPODArray& res) {
433
36
        return StringInStrImpl<LeftDataType, RightDataType>::scalar_vector(rdata, ldata, loffsets,
434
36
                                                                           res);
435
36
    }
436
437
    static Status vector_vector(const ColumnString::Chars& ldata,
438
                                const ColumnString::Offsets& loffsets,
439
                                const ColumnString::Chars& rdata,
440
85
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
441
85
        return StringInStrImpl<LeftDataType, RightDataType>::vector_vector(rdata, roffsets, ldata,
442
85
                                                                           loffsets, res);
443
85
    }
444
};
445
446
// LeftDataType and RightDataType are DataTypeString
447
template <typename LeftDataType, typename RightDataType, typename OP>
448
struct StringFunctionImpl {
449
    using ResultDataType = typename OP::ResultDataType;
450
    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
451
452
    static Status vector_vector(const ColumnString::Chars& ldata,
453
                                const ColumnString::Offsets& loffsets,
454
                                const ColumnString::Chars& rdata,
455
155
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
155
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
155
        auto size = loffsets.size();
459
155
        res.resize(size);
460
460
        for (int i = 0; i < size; ++i) {
461
305
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
305
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
305
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
305
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
305
            std::string_view lview(l_raw_str, l_str_size);
468
305
            std::string_view rview(r_raw_str, r_str_size);
469
470
305
            OP::execute(lview, rview, res[i]);
471
305
        }
472
155
        return Status::OK();
473
155
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
455
90
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
90
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
90
        auto size = loffsets.size();
459
90
        res.resize(size);
460
219
        for (int i = 0; i < size; ++i) {
461
129
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
129
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
129
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
129
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
129
            std::string_view lview(l_raw_str, l_str_size);
468
129
            std::string_view rview(r_raw_str, r_str_size);
469
470
129
            OP::execute(lview, rview, res[i]);
471
129
        }
472
90
        return Status::OK();
473
90
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RS8_
Line
Count
Source
455
31
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
31
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
31
        auto size = loffsets.size();
459
31
        res.resize(size);
460
115
        for (int i = 0; i < size; ++i) {
461
84
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
84
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
84
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
84
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
84
            std::string_view lview(l_raw_str, l_str_size);
468
84
            std::string_view rview(r_raw_str, r_str_size);
469
470
84
            OP::execute(lview, rview, res[i]);
471
84
        }
472
31
        return Status::OK();
473
31
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEESA_SD_RNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
455
34
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
456
34
        DCHECK_EQ(loffsets.size(), roffsets.size());
457
458
34
        auto size = loffsets.size();
459
34
        res.resize(size);
460
126
        for (int i = 0; i < size; ++i) {
461
92
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
462
92
            int l_str_size = loffsets[i] - loffsets[i - 1];
463
464
92
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
465
92
            int r_str_size = roffsets[i] - roffsets[i - 1];
466
467
92
            std::string_view lview(l_raw_str, l_str_size);
468
92
            std::string_view rview(r_raw_str, r_str_size);
469
470
92
            OP::execute(lview, rview, res[i]);
471
92
        }
472
34
        return Status::OK();
473
34
    }
474
    static Status vector_scalar(const ColumnString::Chars& ldata,
475
                                const ColumnString::Offsets& loffsets, const StringRef& rdata,
476
34
                                ResultPaddedPODArray& res) {
477
34
        auto size = loffsets.size();
478
34
        res.resize(size);
479
34
        std::string_view rview(rdata.data, rdata.size);
480
68
        for (int i = 0; i < size; ++i) {
481
34
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
34
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
34
            std::string_view lview(l_raw_str, l_str_size);
484
485
34
            OP::execute(lview, rview, res[i]);
486
34
        }
487
34
        return Status::OK();
488
34
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
476
4
                                ResultPaddedPODArray& res) {
477
4
        auto size = loffsets.size();
478
4
        res.resize(size);
479
4
        std::string_view rview(rdata.data, rdata.size);
480
8
        for (int i = 0; i < size; ++i) {
481
4
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
4
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
4
            std::string_view lview(l_raw_str, l_str_size);
484
485
4
            OP::execute(lview, rview, res[i]);
486
4
        }
487
4
        return Status::OK();
488
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERS8_
Line
Count
Source
476
14
                                ResultPaddedPODArray& res) {
477
14
        auto size = loffsets.size();
478
14
        res.resize(size);
479
14
        std::string_view rview(rdata.data, rdata.size);
480
28
        for (int i = 0; i < size; ++i) {
481
14
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
14
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
14
            std::string_view lview(l_raw_str, l_str_size);
484
485
14
            OP::execute(lview, rview, res[i]);
486
14
        }
487
14
        return Status::OK();
488
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13vector_scalarERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS4_IjLm4096ES7_Lm16ELm15EEERKNS_9StringRefERNS4_IiLm4096ES7_Lm16ELm15EEE
Line
Count
Source
476
16
                                ResultPaddedPODArray& res) {
477
16
        auto size = loffsets.size();
478
16
        res.resize(size);
479
16
        std::string_view rview(rdata.data, rdata.size);
480
32
        for (int i = 0; i < size; ++i) {
481
16
            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
482
16
            int l_str_size = loffsets[i] - loffsets[i - 1];
483
16
            std::string_view lview(l_raw_str, l_str_size);
484
485
16
            OP::execute(lview, rview, res[i]);
486
16
        }
487
16
        return Status::OK();
488
16
    }
489
    static Status scalar_vector(const StringRef& ldata, const ColumnString::Chars& rdata,
490
44
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
44
        auto size = roffsets.size();
492
44
        res.resize(size);
493
44
        std::string_view lview(ldata.data, ldata.size);
494
94
        for (int i = 0; i < size; ++i) {
495
50
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
50
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
50
            std::string_view rview(r_raw_str, r_str_size);
498
499
50
            OP::execute(lview, rview, res[i]);
500
50
        }
501
44
        return Status::OK();
502
44
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_12StartsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
490
4
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
4
        auto size = roffsets.size();
492
4
        res.resize(size);
493
4
        std::string_view lview(ldata.data, ldata.size);
494
8
        for (int i = 0; i < size; ++i) {
495
4
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
4
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
4
            std::string_view rview(r_raw_str, r_str_size);
498
499
4
            OP::execute(lview, rview, res[i]);
500
4
        }
501
4
        return Status::OK();
502
4
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_10EndsWithOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERSB_
Line
Count
Source
490
14
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
14
        auto size = roffsets.size();
492
14
        res.resize(size);
493
14
        std::string_view lview(ldata.data, ldata.size);
494
28
        for (int i = 0; i < size; ++i) {
495
14
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
14
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
14
            std::string_view rview(r_raw_str, r_str_size);
498
499
14
            OP::execute(lview, rview, res[i]);
500
14
        }
501
14
        return Status::OK();
502
14
    }
_ZN5doris18StringFunctionImplINS_14DataTypeStringES1_NS_11FindInSetOpEE13scalar_vectorERKNS_9StringRefERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS7_IjLm4096ESA_Lm16ELm15EEERNS7_IiLm4096ESA_Lm16ELm15EEE
Line
Count
Source
490
26
                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
491
26
        auto size = roffsets.size();
492
26
        res.resize(size);
493
26
        std::string_view lview(ldata.data, ldata.size);
494
58
        for (int i = 0; i < size; ++i) {
495
32
            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
496
32
            int r_str_size = roffsets[i] - roffsets[i - 1];
497
32
            std::string_view rview(r_raw_str, r_str_size);
498
499
32
            OP::execute(lview, rview, res[i]);
500
32
        }
501
26
        return Status::OK();
502
26
    }
503
};
504
505
struct NameToLower {
506
    static constexpr auto name = "lower";
507
};
508
509
struct NameToUpper {
510
    static constexpr auto name = "upper";
511
};
512
513
template <typename OpName>
514
struct TransferImpl {
515
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
516
358
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
517
358
        size_t offset_size = offsets.size();
518
358
        if (UNLIKELY(!offset_size)) {
519
0
            return Status::OK();
520
0
        }
521
522
358
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
523
358
        res_offsets.resize(offset_size);
524
358
        if (is_ascii) {
525
312
            memcpy_small_allow_read_write_overflow15(
526
312
                    res_offsets.data(), offsets.data(),
527
312
                    offset_size * sizeof(ColumnString::Offsets::value_type));
528
529
312
            size_t data_length = data.size();
530
312
            res_data.resize(data_length);
531
312
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
532
84
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
533
228
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
534
228
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
535
228
            }
536
312
        } else {
537
46
            execute_utf8(data, offsets, res_data, res_offsets);
538
46
        }
539
540
358
        return Status::OK();
541
358
    }
_ZN5doris12TransferImplINS_11NameToLowerEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
516
242
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
517
242
        size_t offset_size = offsets.size();
518
242
        if (UNLIKELY(!offset_size)) {
519
0
            return Status::OK();
520
0
        }
521
522
242
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
523
242
        res_offsets.resize(offset_size);
524
242
        if (is_ascii) {
525
228
            memcpy_small_allow_read_write_overflow15(
526
228
                    res_offsets.data(), offsets.data(),
527
228
                    offset_size * sizeof(ColumnString::Offsets::value_type));
528
529
228
            size_t data_length = data.size();
530
228
            res_data.resize(data_length);
531
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
532
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
533
228
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
534
228
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
535
228
            }
536
228
        } else {
537
14
            execute_utf8(data, offsets, res_data, res_offsets);
538
14
        }
539
540
242
        return Status::OK();
541
242
    }
_ZN5doris12TransferImplINS_11NameToUpperEE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
516
116
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
517
116
        size_t offset_size = offsets.size();
518
116
        if (UNLIKELY(!offset_size)) {
519
0
            return Status::OK();
520
0
        }
521
522
116
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
523
116
        res_offsets.resize(offset_size);
524
116
        if (is_ascii) {
525
84
            memcpy_small_allow_read_write_overflow15(
526
84
                    res_offsets.data(), offsets.data(),
527
84
                    offset_size * sizeof(ColumnString::Offsets::value_type));
528
529
84
            size_t data_length = data.size();
530
84
            res_data.resize(data_length);
531
84
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
532
84
                simd::VStringFunctions::to_upper(data.data(), data_length, res_data.data());
533
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
534
                simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
535
            }
536
84
        } else {
537
32
            execute_utf8(data, offsets, res_data, res_offsets);
538
32
        }
539
540
116
        return Status::OK();
541
116
    }
542
543
    static void execute_utf8(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
544
46
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
545
46
        std::string result;
546
170
        for (int64_t i = 0; i < offsets.size(); ++i) {
547
124
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
548
124
            uint32_t size = offsets[i] - offsets[i - 1];
549
550
124
            result.clear();
551
124
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
552
84
                to_upper_utf8(begin, size, result);
553
84
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
554
40
                to_lower_utf8(begin, size, result);
555
40
            }
556
124
            StringOP::push_value_string(result, i, res_data, res_offsets);
557
124
        }
558
46
    }
_ZN5doris12TransferImplINS_11NameToLowerEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
544
14
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
545
14
        std::string result;
546
54
        for (int64_t i = 0; i < offsets.size(); ++i) {
547
40
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
548
40
            uint32_t size = offsets[i] - offsets[i - 1];
549
550
40
            result.clear();
551
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
552
                to_upper_utf8(begin, size, result);
553
40
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
554
40
                to_lower_utf8(begin, size, result);
555
40
            }
556
40
            StringOP::push_value_string(result, i, res_data, res_offsets);
557
40
        }
558
14
    }
_ZN5doris12TransferImplINS_11NameToUpperEE12execute_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS3_IjLm4096ES6_Lm16ELm15EEERS7_RSA_
Line
Count
Source
544
32
                             ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
545
32
        std::string result;
546
116
        for (int64_t i = 0; i < offsets.size(); ++i) {
547
84
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
548
84
            uint32_t size = offsets[i] - offsets[i - 1];
549
550
84
            result.clear();
551
84
            if constexpr (std::is_same_v<OpName, NameToUpper>) {
552
84
                to_upper_utf8(begin, size, result);
553
            } else if constexpr (std::is_same_v<OpName, NameToLower>) {
554
                to_lower_utf8(begin, size, result);
555
            }
556
84
            StringOP::push_value_string(result, i, res_data, res_offsets);
557
84
        }
558
32
    }
559
560
84
    static void to_upper_utf8(const char* data, uint32_t size, std::string& result) {
561
84
        icu::StringPiece sp;
562
84
        sp.set(data, size);
563
84
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
564
84
        unicode_str.toUpper();
565
84
        unicode_str.toUTF8String(result);
566
84
    }
567
568
40
    static void to_lower_utf8(const char* data, uint32_t size, std::string& result) {
569
40
        icu::StringPiece sp;
570
40
        sp.set(data, size);
571
40
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
572
40
        unicode_str.toLower();
573
40
        unicode_str.toUTF8String(result);
574
40
    }
575
};
576
577
// Capitalize first letter
578
struct NameToInitcap {
579
    static constexpr auto name = "initcap";
580
};
581
582
struct InitcapImpl {
583
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
584
19
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
585
19
        res_offsets.resize(offsets.size());
586
587
19
        const bool is_ascii = simd::VStringFunctions::is_ascii({data.data(), data.size()});
588
19
        if (is_ascii) {
589
16
            impl_vectors_ascii(data, offsets, res_data, res_offsets);
590
16
        } else {
591
3
            impl_vectors_utf8(data, offsets, res_data, res_offsets);
592
3
        }
593
19
        return Status::OK();
594
19
    }
595
596
    static void impl_vectors_ascii(const ColumnString::Chars& data,
597
                                   const ColumnString::Offsets& offsets,
598
                                   ColumnString::Chars& res_data,
599
16
                                   ColumnString::Offsets& res_offsets) {
600
16
        size_t offset_size = offsets.size();
601
16
        memcpy_small_allow_read_write_overflow15(
602
16
                res_offsets.data(), offsets.data(),
603
16
                offset_size * sizeof(ColumnString::Offsets::value_type));
604
605
16
        size_t data_length = data.size();
606
16
        res_data.resize(data_length);
607
16
        simd::VStringFunctions::to_lower(data.data(), data_length, res_data.data());
608
609
16
        bool need_capitalize = true;
610
49
        for (size_t offset_index = 0, start_index = 0; offset_index < offset_size; ++offset_index) {
611
33
            auto end_index = res_offsets[offset_index];
612
33
            need_capitalize = true;
613
614
345
            for (size_t i = start_index; i < end_index; ++i) {
615
312
                if (!::isalnum(res_data[i])) {
616
28
                    need_capitalize = true;
617
284
                } else if (need_capitalize) {
618
                    /*
619
                    https://en.cppreference.com/w/cpp/string/byte/toupper
620
                    Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. 
621
                    To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
622
                    char my_toupper(char ch)
623
                    {
624
                        return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
625
                    }
626
                    */
627
50
                    res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
628
50
                    need_capitalize = false;
629
50
                }
630
312
            }
631
632
33
            start_index = end_index;
633
33
        }
634
16
    }
635
636
    static void impl_vectors_utf8(const ColumnString::Chars& data,
637
                                  const ColumnString::Offsets& offsets,
638
                                  ColumnString::Chars& res_data,
639
3
                                  ColumnString::Offsets& res_offsets) {
640
3
        std::string result;
641
13
        for (int64_t i = 0; i < offsets.size(); ++i) {
642
10
            const char* begin = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
643
10
            uint32_t size = offsets[i] - offsets[i - 1];
644
10
            result.clear();
645
10
            to_initcap_utf8(begin, size, result);
646
10
            StringOP::push_value_string(result, i, res_data, res_offsets);
647
10
        }
648
3
    }
649
650
10
    static void to_initcap_utf8(const char* data, uint32_t size, std::string& result) {
651
10
        icu::StringPiece sp;
652
10
        sp.set(data, size);
653
10
        icu::UnicodeString unicode_str = icu::UnicodeString::fromUTF8(sp);
654
10
        unicode_str.toLower();
655
10
        icu::UnicodeString output_str;
656
10
        bool need_capitalize = true;
657
10
        icu::StringCharacterIterator iter(unicode_str);
658
196
        for (UChar32 ch = iter.first32(); ch != icu::CharacterIterator::DONE; ch = iter.next32()) {
659
186
            if (!u_isalnum(ch)) {
660
59
                need_capitalize = true;
661
127
            } else if (need_capitalize) {
662
30
                ch = u_toupper(ch);
663
30
                need_capitalize = false;
664
30
            }
665
186
            output_str.append(ch);
666
186
        }
667
10
        output_str.toUTF8String(result);
668
10
    }
669
};
670
671
struct NameTrim {
672
    static constexpr auto name = "trim";
673
};
674
struct NameLTrim {
675
    static constexpr auto name = "ltrim";
676
};
677
struct NameRTrim {
678
    static constexpr auto name = "rtrim";
679
};
680
struct NameTrimIn {
681
    static constexpr auto name = "trim_in";
682
};
683
struct NameLTrimIn {
684
    static constexpr auto name = "ltrim_in";
685
};
686
struct NameRTrimIn {
687
    static constexpr auto name = "rtrim_in";
688
};
689
template <bool is_ltrim, bool is_rtrim, bool trim_single>
690
struct TrimUtil {
691
    static Status vector(const ColumnString::Chars& str_data,
692
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
693
175
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
175
        const size_t offset_size = str_offsets.size();
695
175
        res_offsets.resize(offset_size);
696
175
        res_data.reserve(str_data.size());
697
602
        for (size_t i = 0; i < offset_size; ++i) {
698
427
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
427
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
427
            if constexpr (is_ltrim) {
702
296
                str_begin =
703
296
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
296
            }
705
427
            if constexpr (is_rtrim) {
706
296
                str_end =
707
296
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
296
            }
709
710
427
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
427
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
427
        }
714
175
        return Status::OK();
715
175
    }
_ZN5doris8TrimUtilILb1ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
50
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
50
        const size_t offset_size = str_offsets.size();
695
50
        res_offsets.resize(offset_size);
696
50
        res_data.reserve(str_data.size());
697
162
        for (size_t i = 0; i < offset_size; ++i) {
698
112
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
112
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
112
            if constexpr (is_ltrim) {
702
112
                str_begin =
703
112
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
112
            }
705
112
            if constexpr (is_rtrim) {
706
112
                str_end =
707
112
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
112
            }
709
710
112
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
112
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
112
        }
714
50
        return Status::OK();
715
50
    }
_ZN5doris8TrimUtilILb1ELb0ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
37
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
37
        const size_t offset_size = str_offsets.size();
695
37
        res_offsets.resize(offset_size);
696
37
        res_data.reserve(str_data.size());
697
118
        for (size_t i = 0; i < offset_size; ++i) {
698
81
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
81
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
81
            if constexpr (is_ltrim) {
702
81
                str_begin =
703
81
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
81
            }
705
            if constexpr (is_rtrim) {
706
                str_end =
707
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
            }
709
710
81
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
81
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
81
        }
714
37
        return Status::OK();
715
37
    }
_ZN5doris8TrimUtilILb0ELb1ELb1EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
53
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
53
        const size_t offset_size = str_offsets.size();
695
53
        res_offsets.resize(offset_size);
696
53
        res_data.reserve(str_data.size());
697
184
        for (size_t i = 0; i < offset_size; ++i) {
698
131
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
131
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
            if constexpr (is_ltrim) {
702
                str_begin =
703
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
            }
705
131
            if constexpr (is_rtrim) {
706
131
                str_end =
707
131
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
131
            }
709
710
131
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
131
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
131
        }
714
53
        return Status::OK();
715
53
    }
_ZN5doris8TrimUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
19
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
19
        const size_t offset_size = str_offsets.size();
695
19
        res_offsets.resize(offset_size);
696
19
        res_data.reserve(str_data.size());
697
72
        for (size_t i = 0; i < offset_size; ++i) {
698
53
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
53
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
53
            if constexpr (is_ltrim) {
702
53
                str_begin =
703
53
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
53
            }
705
53
            if constexpr (is_rtrim) {
706
53
                str_end =
707
53
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
53
            }
709
710
53
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
53
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
53
        }
714
19
        return Status::OK();
715
19
    }
_ZN5doris8TrimUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
693
16
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
694
16
        const size_t offset_size = str_offsets.size();
695
16
        res_offsets.resize(offset_size);
696
16
        res_data.reserve(str_data.size());
697
66
        for (size_t i = 0; i < offset_size; ++i) {
698
50
            const auto* str_begin = str_data.data() + str_offsets[i - 1];
699
50
            const auto* str_end = str_data.data() + str_offsets[i];
700
701
50
            if constexpr (is_ltrim) {
702
50
                str_begin =
703
50
                        simd::VStringFunctions::ltrim<trim_single>(str_begin, str_end, remove_str);
704
50
            }
705
            if constexpr (is_rtrim) {
706
                str_end =
707
                        simd::VStringFunctions::rtrim<trim_single>(str_begin, str_end, remove_str);
708
            }
709
710
50
            res_data.insert_assume_reserved(str_begin, str_end);
711
            // The length of the result of the trim function will never exceed the length of the input.
712
50
            res_offsets[i] = (ColumnString::Offset)res_data.size();
713
50
        }
714
16
        return Status::OK();
715
16
    }
Unexecuted instantiation: _ZN5doris8TrimUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
716
};
717
template <bool is_ltrim, bool is_rtrim, bool trim_single>
718
struct TrimInUtil {
719
    static Status vector(const ColumnString::Chars& str_data,
720
                         const ColumnString::Offsets& str_offsets, const StringRef& remove_str,
721
112
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
112
        const size_t offset_size = str_offsets.size();
723
112
        res_offsets.resize(offset_size);
724
112
        res_data.reserve(str_data.size());
725
112
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
112
                         simd::VStringFunctions::is_ascii(StringRef(
727
67
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
112
        if (all_ascii) {
730
59
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
59
        } else {
732
53
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
53
        }
734
112
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
721
39
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
39
        const size_t offset_size = str_offsets.size();
723
39
        res_offsets.resize(offset_size);
724
39
        res_data.reserve(str_data.size());
725
39
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
39
                         simd::VStringFunctions::is_ascii(StringRef(
727
24
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
39
        if (all_ascii) {
730
20
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
20
        } else {
732
19
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
19
        }
734
39
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
721
36
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
36
        const size_t offset_size = str_offsets.size();
723
36
        res_offsets.resize(offset_size);
724
36
        res_data.reserve(str_data.size());
725
36
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
36
                         simd::VStringFunctions::is_ascii(StringRef(
727
21
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
36
        if (all_ascii) {
730
19
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
19
        } else {
732
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
17
        }
734
36
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE6vectorERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
721
37
                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
722
37
        const size_t offset_size = str_offsets.size();
723
37
        res_offsets.resize(offset_size);
724
37
        res_data.reserve(str_data.size());
725
37
        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
726
37
                         simd::VStringFunctions::is_ascii(StringRef(
727
22
                                 reinterpret_cast<const char*>(str_data.data()), str_data.size()));
728
729
37
        if (all_ascii) {
730
20
            return impl_vectors_ascii(str_data, str_offsets, remove_str, res_data, res_offsets);
731
20
        } else {
732
17
            return impl_vectors_utf8(str_data, str_offsets, remove_str, res_data, res_offsets);
733
17
        }
734
37
    }
735
736
private:
737
    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
738
                                     const ColumnString::Offsets& str_offsets,
739
                                     const StringRef& remove_str, ColumnString::Chars& res_data,
740
59
                                     ColumnString::Offsets& res_offsets) {
741
59
        const size_t offset_size = str_offsets.size();
742
59
        std::bitset<128> char_lookup;
743
59
        const char* remove_begin = remove_str.data;
744
59
        const char* remove_end = remove_str.data + remove_str.size;
745
746
224
        while (remove_begin < remove_end) {
747
165
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
165
            remove_begin += 1;
749
165
        }
750
751
118
        for (size_t i = 0; i < offset_size; ++i) {
752
59
            const char* str_begin =
753
59
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
59
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
59
            const char* left_trim_pos = str_begin;
756
59
            const char* right_trim_pos = str_end;
757
758
59
            if constexpr (is_ltrim) {
759
116
                while (left_trim_pos < str_end) {
760
104
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
27
                        break;
762
27
                    }
763
77
                    ++left_trim_pos;
764
77
                }
765
39
            }
766
767
59
            if constexpr (is_rtrim) {
768
90
                while (right_trim_pos > left_trim_pos) {
769
78
                    --right_trim_pos;
770
78
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
28
                        ++right_trim_pos;
772
28
                        break;
773
28
                    }
774
78
                }
775
40
            }
776
777
59
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
59
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
59
        }
781
782
59
        return Status::OK();
783
59
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
740
20
                                     ColumnString::Offsets& res_offsets) {
741
20
        const size_t offset_size = str_offsets.size();
742
20
        std::bitset<128> char_lookup;
743
20
        const char* remove_begin = remove_str.data;
744
20
        const char* remove_end = remove_str.data + remove_str.size;
745
746
74
        while (remove_begin < remove_end) {
747
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
54
            remove_begin += 1;
749
54
        }
750
751
40
        for (size_t i = 0; i < offset_size; ++i) {
752
20
            const char* str_begin =
753
20
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
20
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
20
            const char* left_trim_pos = str_begin;
756
20
            const char* right_trim_pos = str_end;
757
758
20
            if constexpr (is_ltrim) {
759
46
                while (left_trim_pos < str_end) {
760
40
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
14
                        break;
762
14
                    }
763
26
                    ++left_trim_pos;
764
26
                }
765
20
            }
766
767
20
            if constexpr (is_rtrim) {
768
29
                while (right_trim_pos > left_trim_pos) {
769
23
                    --right_trim_pos;
770
23
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
14
                        ++right_trim_pos;
772
14
                        break;
773
14
                    }
774
23
                }
775
20
            }
776
777
20
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
20
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
20
        }
781
782
20
        return Status::OK();
783
20
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
740
19
                                     ColumnString::Offsets& res_offsets) {
741
19
        const size_t offset_size = str_offsets.size();
742
19
        std::bitset<128> char_lookup;
743
19
        const char* remove_begin = remove_str.data;
744
19
        const char* remove_end = remove_str.data + remove_str.size;
745
746
73
        while (remove_begin < remove_end) {
747
54
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
54
            remove_begin += 1;
749
54
        }
750
751
38
        for (size_t i = 0; i < offset_size; ++i) {
752
19
            const char* str_begin =
753
19
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
19
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
19
            const char* left_trim_pos = str_begin;
756
19
            const char* right_trim_pos = str_end;
757
758
19
            if constexpr (is_ltrim) {
759
70
                while (left_trim_pos < str_end) {
760
64
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
13
                        break;
762
13
                    }
763
51
                    ++left_trim_pos;
764
51
                }
765
19
            }
766
767
            if constexpr (is_rtrim) {
768
                while (right_trim_pos > left_trim_pos) {
769
                    --right_trim_pos;
770
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
                        ++right_trim_pos;
772
                        break;
773
                    }
774
                }
775
            }
776
777
19
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
19
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
19
        }
781
782
19
        return Status::OK();
783
19
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE18impl_vectors_asciiERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
740
20
                                     ColumnString::Offsets& res_offsets) {
741
20
        const size_t offset_size = str_offsets.size();
742
20
        std::bitset<128> char_lookup;
743
20
        const char* remove_begin = remove_str.data;
744
20
        const char* remove_end = remove_str.data + remove_str.size;
745
746
77
        while (remove_begin < remove_end) {
747
57
            char_lookup.set(static_cast<unsigned char>(*remove_begin));
748
57
            remove_begin += 1;
749
57
        }
750
751
40
        for (size_t i = 0; i < offset_size; ++i) {
752
20
            const char* str_begin =
753
20
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
754
20
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
755
20
            const char* left_trim_pos = str_begin;
756
20
            const char* right_trim_pos = str_end;
757
758
            if constexpr (is_ltrim) {
759
                while (left_trim_pos < str_end) {
760
                    if (!char_lookup.test(static_cast<unsigned char>(*left_trim_pos))) {
761
                        break;
762
                    }
763
                    ++left_trim_pos;
764
                }
765
            }
766
767
20
            if constexpr (is_rtrim) {
768
61
                while (right_trim_pos > left_trim_pos) {
769
55
                    --right_trim_pos;
770
55
                    if (!char_lookup.test(static_cast<unsigned char>(*right_trim_pos))) {
771
14
                        ++right_trim_pos;
772
14
                        break;
773
14
                    }
774
55
                }
775
20
            }
776
777
20
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
778
            // The length of the result of the trim function will never exceed the length of the input.
779
20
            res_offsets[i] = (ColumnString::Offset)res_data.size();
780
20
        }
781
782
20
        return Status::OK();
783
20
    }
784
785
    static Status impl_vectors_utf8(const ColumnString::Chars& str_data,
786
                                    const ColumnString::Offsets& str_offsets,
787
                                    const StringRef& remove_str, ColumnString::Chars& res_data,
788
53
                                    ColumnString::Offsets& res_offsets) {
789
53
        const size_t offset_size = str_offsets.size();
790
53
        res_offsets.resize(offset_size);
791
53
        res_data.reserve(str_data.size());
792
793
53
        std::unordered_set<std::string_view> char_lookup;
794
53
        const char* remove_begin = remove_str.data;
795
53
        const char* remove_end = remove_str.data + remove_str.size;
796
797
240
        while (remove_begin < remove_end) {
798
187
            size_t byte_len, char_len;
799
187
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
187
                    remove_begin, remove_end, 1);
801
187
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
187
            remove_begin += byte_len;
803
187
        }
804
805
140
        for (size_t i = 0; i < offset_size; ++i) {
806
87
            const char* str_begin =
807
87
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
87
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
87
            const char* left_trim_pos = str_begin;
810
87
            const char* right_trim_pos = str_end;
811
812
87
            if constexpr (is_ltrim) {
813
81
                while (left_trim_pos < str_end) {
814
73
                    size_t byte_len, char_len;
815
73
                    std::tie(byte_len, char_len) =
816
73
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
73
                                                                                   str_end, 1);
818
73
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
73
                        char_lookup.end()) {
820
52
                        break;
821
52
                    }
822
21
                    left_trim_pos += byte_len;
823
21
                }
824
60
            }
825
826
87
            if constexpr (is_rtrim) {
827
88
                while (right_trim_pos > left_trim_pos) {
828
80
                    const char* prev_char_pos = right_trim_pos;
829
156
                    do {
830
156
                        --prev_char_pos;
831
156
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
80
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
80
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
80
                        char_lookup.end()) {
835
52
                        break;
836
52
                    }
837
28
                    right_trim_pos = prev_char_pos;
838
28
                }
839
60
            }
840
841
87
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
87
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
87
        }
845
53
        return Status::OK();
846
53
    }
_ZN5doris10TrimInUtilILb1ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
788
19
                                    ColumnString::Offsets& res_offsets) {
789
19
        const size_t offset_size = str_offsets.size();
790
19
        res_offsets.resize(offset_size);
791
19
        res_data.reserve(str_data.size());
792
793
19
        std::unordered_set<std::string_view> char_lookup;
794
19
        const char* remove_begin = remove_str.data;
795
19
        const char* remove_end = remove_str.data + remove_str.size;
796
797
84
        while (remove_begin < remove_end) {
798
65
            size_t byte_len, char_len;
799
65
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
65
                    remove_begin, remove_end, 1);
801
65
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
65
            remove_begin += byte_len;
803
65
        }
804
805
52
        for (size_t i = 0; i < offset_size; ++i) {
806
33
            const char* str_begin =
807
33
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
33
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
33
            const char* left_trim_pos = str_begin;
810
33
            const char* right_trim_pos = str_end;
811
812
33
            if constexpr (is_ltrim) {
813
45
                while (left_trim_pos < str_end) {
814
41
                    size_t byte_len, char_len;
815
41
                    std::tie(byte_len, char_len) =
816
41
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
41
                                                                                   str_end, 1);
818
41
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
41
                        char_lookup.end()) {
820
29
                        break;
821
29
                    }
822
12
                    left_trim_pos += byte_len;
823
12
                }
824
33
            }
825
826
33
            if constexpr (is_rtrim) {
827
48
                while (right_trim_pos > left_trim_pos) {
828
44
                    const char* prev_char_pos = right_trim_pos;
829
90
                    do {
830
90
                        --prev_char_pos;
831
90
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
44
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
44
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
44
                        char_lookup.end()) {
835
29
                        break;
836
29
                    }
837
15
                    right_trim_pos = prev_char_pos;
838
15
                }
839
33
            }
840
841
33
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
33
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
33
        }
845
19
        return Status::OK();
846
19
    }
_ZN5doris10TrimInUtilILb1ELb0ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
788
17
                                    ColumnString::Offsets& res_offsets) {
789
17
        const size_t offset_size = str_offsets.size();
790
17
        res_offsets.resize(offset_size);
791
17
        res_data.reserve(str_data.size());
792
793
17
        std::unordered_set<std::string_view> char_lookup;
794
17
        const char* remove_begin = remove_str.data;
795
17
        const char* remove_end = remove_str.data + remove_str.size;
796
797
78
        while (remove_begin < remove_end) {
798
61
            size_t byte_len, char_len;
799
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
61
                    remove_begin, remove_end, 1);
801
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
61
            remove_begin += byte_len;
803
61
        }
804
805
44
        for (size_t i = 0; i < offset_size; ++i) {
806
27
            const char* str_begin =
807
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
27
            const char* left_trim_pos = str_begin;
810
27
            const char* right_trim_pos = str_end;
811
812
27
            if constexpr (is_ltrim) {
813
36
                while (left_trim_pos < str_end) {
814
32
                    size_t byte_len, char_len;
815
32
                    std::tie(byte_len, char_len) =
816
32
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
32
                                                                                   str_end, 1);
818
32
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
32
                        char_lookup.end()) {
820
23
                        break;
821
23
                    }
822
9
                    left_trim_pos += byte_len;
823
9
                }
824
27
            }
825
826
            if constexpr (is_rtrim) {
827
                while (right_trim_pos > left_trim_pos) {
828
                    const char* prev_char_pos = right_trim_pos;
829
                    do {
830
                        --prev_char_pos;
831
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
                        char_lookup.end()) {
835
                        break;
836
                    }
837
                    right_trim_pos = prev_char_pos;
838
                }
839
            }
840
841
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
27
        }
845
17
        return Status::OK();
846
17
    }
_ZN5doris10TrimInUtilILb0ELb1ELb0EE17impl_vectors_utf8ERKNS_8PODArrayIhLm4096ENS_9AllocatorILb0ELb0ELb0ENS_22DefaultMemoryAllocatorELb1EEELm16ELm15EEERKNS2_IjLm4096ES5_Lm16ELm15EEERKNS_9StringRefERS6_RS9_
Line
Count
Source
788
17
                                    ColumnString::Offsets& res_offsets) {
789
17
        const size_t offset_size = str_offsets.size();
790
17
        res_offsets.resize(offset_size);
791
17
        res_data.reserve(str_data.size());
792
793
17
        std::unordered_set<std::string_view> char_lookup;
794
17
        const char* remove_begin = remove_str.data;
795
17
        const char* remove_end = remove_str.data + remove_str.size;
796
797
78
        while (remove_begin < remove_end) {
798
61
            size_t byte_len, char_len;
799
61
            std::tie(byte_len, char_len) = simd::VStringFunctions::iterate_utf8_with_limit_length(
800
61
                    remove_begin, remove_end, 1);
801
61
            char_lookup.insert(std::string_view(remove_begin, byte_len));
802
61
            remove_begin += byte_len;
803
61
        }
804
805
44
        for (size_t i = 0; i < offset_size; ++i) {
806
27
            const char* str_begin =
807
27
                    reinterpret_cast<const char*>(str_data.data() + str_offsets[i - 1]);
808
27
            const char* str_end = reinterpret_cast<const char*>(str_data.data() + str_offsets[i]);
809
27
            const char* left_trim_pos = str_begin;
810
27
            const char* right_trim_pos = str_end;
811
812
            if constexpr (is_ltrim) {
813
                while (left_trim_pos < str_end) {
814
                    size_t byte_len, char_len;
815
                    std::tie(byte_len, char_len) =
816
                            simd::VStringFunctions::iterate_utf8_with_limit_length(left_trim_pos,
817
                                                                                   str_end, 1);
818
                    if (char_lookup.find(std::string_view(left_trim_pos, byte_len)) ==
819
                        char_lookup.end()) {
820
                        break;
821
                    }
822
                    left_trim_pos += byte_len;
823
                }
824
            }
825
826
27
            if constexpr (is_rtrim) {
827
40
                while (right_trim_pos > left_trim_pos) {
828
36
                    const char* prev_char_pos = right_trim_pos;
829
66
                    do {
830
66
                        --prev_char_pos;
831
66
                    } while ((*prev_char_pos & 0xC0) == 0x80);
832
36
                    size_t byte_len = right_trim_pos - prev_char_pos;
833
36
                    if (char_lookup.find(std::string_view(prev_char_pos, byte_len)) ==
834
36
                        char_lookup.end()) {
835
23
                        break;
836
23
                    }
837
13
                    right_trim_pos = prev_char_pos;
838
13
                }
839
27
            }
840
841
27
            res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
842
            // The length of the result of the trim function will never exceed the length of the input.
843
27
            res_offsets[i] = (ColumnString::Offset)res_data.size();
844
27
        }
845
17
        return Status::OK();
846
17
    }
847
};
848
// This is an implementation of a parameter for the Trim function.
849
template <bool is_ltrim, bool is_rtrim, typename Name>
850
struct Trim1Impl {
851
    static constexpr auto name = Name::name;
852
853
122
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
853
40
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
853
24
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
853
24
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
853
8
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
853
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
853
13
    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
854
855
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
856
108
                          uint32_t result, size_t input_rows_count) {
857
108
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
108
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
108
            auto col_res = ColumnString::create();
860
108
            char blank[] = " ";
861
108
            const StringRef remove_str(blank, 1);
862
108
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
108
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
108
                    col_res->get_offsets())));
865
108
            block.replace_by_position(result, std::move(col_res));
866
108
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
108
        return Status::OK();
872
108
    }
_ZN5doris9Trim1ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
44
                          uint32_t result, size_t input_rows_count) {
857
44
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
44
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
44
            auto col_res = ColumnString::create();
860
44
            char blank[] = " ";
861
44
            const StringRef remove_str(blank, 1);
862
44
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
44
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
44
                    col_res->get_offsets())));
865
44
            block.replace_by_position(result, std::move(col_res));
866
44
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
44
        return Status::OK();
872
44
    }
_ZN5doris9Trim1ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
27
                          uint32_t result, size_t input_rows_count) {
857
27
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
27
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
27
            auto col_res = ColumnString::create();
860
27
            char blank[] = " ";
861
27
            const StringRef remove_str(blank, 1);
862
27
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
27
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
27
                    col_res->get_offsets())));
865
27
            block.replace_by_position(result, std::move(col_res));
866
27
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
27
        return Status::OK();
872
27
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
27
                          uint32_t result, size_t input_rows_count) {
857
27
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
27
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
27
            auto col_res = ColumnString::create();
860
27
            char blank[] = " ";
861
27
            const StringRef remove_str(blank, 1);
862
27
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
27
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
27
                    col_res->get_offsets())));
865
27
            block.replace_by_position(result, std::move(col_res));
866
27
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
27
        return Status::OK();
872
27
    }
Unexecuted instantiation: _ZN5doris9Trim1ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZN5doris9Trim1ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
5
                          uint32_t result, size_t input_rows_count) {
857
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
5
            auto col_res = ColumnString::create();
860
5
            char blank[] = " ";
861
5
            const StringRef remove_str(blank, 1);
862
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
5
                    col_res->get_offsets())));
865
5
            block.replace_by_position(result, std::move(col_res));
866
5
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
5
        return Status::OK();
872
5
    }
_ZN5doris9Trim1ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
856
5
                          uint32_t result, size_t input_rows_count) {
857
5
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
858
5
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
859
5
            auto col_res = ColumnString::create();
860
5
            char blank[] = " ";
861
5
            const StringRef remove_str(blank, 1);
862
5
            RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
863
5
                    col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
864
5
                    col_res->get_offsets())));
865
5
            block.replace_by_position(result, std::move(col_res));
866
5
        } else {
867
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
868
0
                                        block.get_by_position(arguments[0]).column->get_name(),
869
0
                                        name);
870
0
        }
871
5
        return Status::OK();
872
5
    }
873
};
874
875
// This is an implementation of two parameters for the Trim function.
876
template <bool is_ltrim, bool is_rtrim, typename Name>
877
struct Trim2Impl {
878
    static constexpr auto name = Name::name;
879
880
117
    static DataTypes get_variadic_argument_types() {
881
117
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
117
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE27get_variadic_argument_typesEv
Line
Count
Source
880
13
    static DataTypes get_variadic_argument_types() {
881
13
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
13
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE27get_variadic_argument_typesEv
Line
Count
Source
880
12
    static DataTypes get_variadic_argument_types() {
881
12
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
12
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE27get_variadic_argument_typesEv
Line
Count
Source
880
12
    static DataTypes get_variadic_argument_types() {
881
12
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
12
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
880
21
    static DataTypes get_variadic_argument_types() {
881
21
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
21
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
880
29
    static DataTypes get_variadic_argument_types() {
881
29
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
29
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE27get_variadic_argument_typesEv
Line
Count
Source
880
30
    static DataTypes get_variadic_argument_types() {
881
30
        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
882
30
    }
883
884
    static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
885
179
                          uint32_t result, size_t input_rows_count) {
886
179
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
179
        const auto& rcol =
888
179
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
179
                        ->get_data_column_ptr();
890
179
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
179
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
179
                auto col_res = ColumnString::create();
893
179
                const auto* remove_str_raw = col_right->get_chars().data();
894
179
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
179
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
179
                if (remove_str.size == 1) {
898
32
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
32
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
32
                            col_res->get_offsets())));
901
147
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
112
                                  std::is_same<Name, NameRTrimIn>::value) {
905
112
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
112
                                col->get_chars(), col->get_offsets(), remove_str,
907
112
                                col_res->get_chars(), col_res->get_offsets())));
908
112
                    } else {
909
35
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
35
                                col->get_chars(), col->get_offsets(), remove_str,
911
35
                                col_res->get_chars(), col_res->get_offsets())));
912
35
                    }
913
147
                }
914
179
                block.replace_by_position(result, std::move(col_res));
915
179
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
179
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
179
        return Status::OK();
927
179
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_8NameTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
19
                          uint32_t result, size_t input_rows_count) {
886
19
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
19
        const auto& rcol =
888
19
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
19
                        ->get_data_column_ptr();
890
19
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
19
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
19
                auto col_res = ColumnString::create();
893
19
                const auto* remove_str_raw = col_right->get_chars().data();
894
19
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
19
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
19
                if (remove_str.size == 1) {
898
0
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
0
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
0
                            col_res->get_offsets())));
901
19
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
                                  std::is_same<Name, NameRTrimIn>::value) {
905
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
                                col->get_chars(), col->get_offsets(), remove_str,
907
                                col_res->get_chars(), col_res->get_offsets())));
908
19
                    } else {
909
19
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
19
                                col->get_chars(), col->get_offsets(), remove_str,
911
19
                                col_res->get_chars(), col_res->get_offsets())));
912
19
                    }
913
19
                }
914
19
                block.replace_by_position(result, std::move(col_res));
915
19
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
19
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
19
        return Status::OK();
927
19
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_9NameLTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
16
                          uint32_t result, size_t input_rows_count) {
886
16
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
16
        const auto& rcol =
888
16
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
16
                        ->get_data_column_ptr();
890
16
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
16
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
16
                auto col_res = ColumnString::create();
893
16
                const auto* remove_str_raw = col_right->get_chars().data();
894
16
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
16
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
16
                if (remove_str.size == 1) {
898
0
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
0
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
0
                            col_res->get_offsets())));
901
16
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
                                  std::is_same<Name, NameRTrimIn>::value) {
905
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
                                col->get_chars(), col->get_offsets(), remove_str,
907
                                col_res->get_chars(), col_res->get_offsets())));
908
16
                    } else {
909
16
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
16
                                col->get_chars(), col->get_offsets(), remove_str,
911
16
                                col_res->get_chars(), col_res->get_offsets())));
912
16
                    }
913
16
                }
914
16
                block.replace_by_position(result, std::move(col_res));
915
16
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
16
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
16
        return Status::OK();
927
16
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_9NameRTrimEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
16
                          uint32_t result, size_t input_rows_count) {
886
16
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
16
        const auto& rcol =
888
16
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
16
                        ->get_data_column_ptr();
890
16
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
16
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
16
                auto col_res = ColumnString::create();
893
16
                const auto* remove_str_raw = col_right->get_chars().data();
894
16
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
16
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
16
                if (remove_str.size == 1) {
898
16
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
16
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
16
                            col_res->get_offsets())));
901
16
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
                                  std::is_same<Name, NameRTrimIn>::value) {
905
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
                                col->get_chars(), col->get_offsets(), remove_str,
907
                                col_res->get_chars(), col_res->get_offsets())));
908
0
                    } else {
909
0
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
0
                                col->get_chars(), col->get_offsets(), remove_str,
911
0
                                col_res->get_chars(), col_res->get_offsets())));
912
0
                    }
913
0
                }
914
16
                block.replace_by_position(result, std::move(col_res));
915
16
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
16
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
16
        return Status::OK();
927
16
    }
_ZN5doris9Trim2ImplILb1ELb1ENS_10NameTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
45
                          uint32_t result, size_t input_rows_count) {
886
45
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
45
        const auto& rcol =
888
45
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
45
                        ->get_data_column_ptr();
890
45
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
45
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
45
                auto col_res = ColumnString::create();
893
45
                const auto* remove_str_raw = col_right->get_chars().data();
894
45
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
45
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
45
                if (remove_str.size == 1) {
898
6
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
6
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
6
                            col_res->get_offsets())));
901
39
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
39
                                  std::is_same<Name, NameRTrimIn>::value) {
905
39
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
39
                                col->get_chars(), col->get_offsets(), remove_str,
907
39
                                col_res->get_chars(), col_res->get_offsets())));
908
                    } else {
909
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
                                col->get_chars(), col->get_offsets(), remove_str,
911
                                col_res->get_chars(), col_res->get_offsets())));
912
                    }
913
39
                }
914
45
                block.replace_by_position(result, std::move(col_res));
915
45
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
45
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
45
        return Status::OK();
927
45
    }
_ZN5doris9Trim2ImplILb1ELb0ENS_11NameLTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
41
                          uint32_t result, size_t input_rows_count) {
886
41
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
41
        const auto& rcol =
888
41
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
41
                        ->get_data_column_ptr();
890
41
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
41
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
41
                auto col_res = ColumnString::create();
893
41
                const auto* remove_str_raw = col_right->get_chars().data();
894
41
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
41
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
41
                if (remove_str.size == 1) {
898
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
5
                            col_res->get_offsets())));
901
36
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
36
                                  std::is_same<Name, NameRTrimIn>::value) {
905
36
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
36
                                col->get_chars(), col->get_offsets(), remove_str,
907
36
                                col_res->get_chars(), col_res->get_offsets())));
908
                    } else {
909
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
                                col->get_chars(), col->get_offsets(), remove_str,
911
                                col_res->get_chars(), col_res->get_offsets())));
912
                    }
913
36
                }
914
41
                block.replace_by_position(result, std::move(col_res));
915
41
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
41
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
41
        return Status::OK();
927
41
    }
_ZN5doris9Trim2ImplILb0ELb1ENS_11NameRTrimInEE7executeEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
885
42
                          uint32_t result, size_t input_rows_count) {
886
42
        const ColumnPtr column = block.get_by_position(arguments[0]).column;
887
42
        const auto& rcol =
888
42
                assert_cast<const ColumnConst*>(block.get_by_position(arguments[1]).column.get())
889
42
                        ->get_data_column_ptr();
890
42
        if (const auto* col = assert_cast<const ColumnString*>(column.get())) {
891
42
            if (const auto* col_right = assert_cast<const ColumnString*>(rcol.get())) {
892
42
                auto col_res = ColumnString::create();
893
42
                const auto* remove_str_raw = col_right->get_chars().data();
894
42
                const ColumnString::Offset remove_str_size = col_right->get_offsets()[0];
895
42
                const StringRef remove_str(remove_str_raw, remove_str_size);
896
897
42
                if (remove_str.size == 1) {
898
5
                    RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, true>::vector(
899
5
                            col->get_chars(), col->get_offsets(), remove_str, col_res->get_chars(),
900
5
                            col_res->get_offsets())));
901
37
                } else {
902
                    if constexpr (std::is_same<Name, NameTrimIn>::value ||
903
                                  std::is_same<Name, NameLTrimIn>::value ||
904
37
                                  std::is_same<Name, NameRTrimIn>::value) {
905
37
                        RETURN_IF_ERROR((TrimInUtil<is_ltrim, is_rtrim, false>::vector(
906
37
                                col->get_chars(), col->get_offsets(), remove_str,
907
37
                                col_res->get_chars(), col_res->get_offsets())));
908
                    } else {
909
                        RETURN_IF_ERROR((TrimUtil<is_ltrim, is_rtrim, false>::vector(
910
                                col->get_chars(), col->get_offsets(), remove_str,
911
                                col_res->get_chars(), col_res->get_offsets())));
912
                    }
913
37
                }
914
42
                block.replace_by_position(result, std::move(col_res));
915
42
            } else {
916
0
                return Status::RuntimeError("Illegal column {} of argument of function {}",
917
0
                                            block.get_by_position(arguments[1]).column->get_name(),
918
0
                                            name);
919
0
            }
920
921
42
        } else {
922
0
            return Status::RuntimeError("Illegal column {} of argument of function {}",
923
0
                                        block.get_by_position(arguments[0]).column->get_name(),
924
0
                                        name);
925
0
        }
926
42
        return Status::OK();
927
42
    }
928
};
929
930
template <typename impl>
931
class FunctionTrim : public IFunction {
932
public:
933
    static constexpr auto name = impl::name;
934
251
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
934
41
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
934
25
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
934
25
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE6createEv
Line
Count
Source
934
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE6createEv
Line
Count
Source
934
13
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE6createEv
Line
Count
Source
934
13
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
934
9
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
934
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
934
14
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE6createEv
Line
Count
Source
934
22
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE6createEv
Line
Count
Source
934
30
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
_ZN5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE6createEv
Line
Count
Source
934
31
    static FunctionPtr create() { return std::make_shared<FunctionTrim<impl>>(); }
935
12
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE8get_nameB5cxx11Ev
Line
Count
Source
935
1
    String get_name() const override { return impl::name; }
936
937
143
    size_t get_number_of_arguments() const override {
938
143
        return get_variadic_argument_types_impl().size();
939
143
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
32
    size_t get_number_of_arguments() const override {
938
32
        return get_variadic_argument_types_impl().size();
939
32
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
16
    size_t get_number_of_arguments() const override {
938
16
        return get_variadic_argument_types_impl().size();
939
16
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
16
    size_t get_number_of_arguments() const override {
938
16
        return get_variadic_argument_types_impl().size();
939
16
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
5
    size_t get_number_of_arguments() const override {
938
5
        return get_variadic_argument_types_impl().size();
939
5
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
4
    size_t get_number_of_arguments() const override {
938
4
        return get_variadic_argument_types_impl().size();
939
4
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE23get_number_of_argumentsEv
Line
Count
Source
937
4
    size_t get_number_of_arguments() const override {
938
4
        return get_variadic_argument_types_impl().size();
939
4
    }
Unexecuted instantiation: _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
5
    size_t get_number_of_arguments() const override {
938
5
        return get_variadic_argument_types_impl().size();
939
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
5
    size_t get_number_of_arguments() const override {
938
5
        return get_variadic_argument_types_impl().size();
939
5
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
13
    size_t get_number_of_arguments() const override {
938
13
        return get_variadic_argument_types_impl().size();
939
13
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
21
    size_t get_number_of_arguments() const override {
938
21
        return get_variadic_argument_types_impl().size();
939
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE23get_number_of_argumentsEv
Line
Count
Source
937
22
    size_t get_number_of_arguments() const override {
938
22
        return get_variadic_argument_types_impl().size();
939
22
    }
940
941
143
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
143
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
143
        return arguments[0];
948
143
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
32
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
32
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
32
        return arguments[0];
948
32
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
16
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
16
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
16
        return arguments[0];
948
16
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
16
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
16
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
16
        return arguments[0];
948
16
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
5
        return arguments[0];
948
5
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
4
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
4
        return arguments[0];
948
4
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
4
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
4
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
4
        return arguments[0];
948
4
    }
Unexecuted instantiation: _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
5
        return arguments[0];
948
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
5
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
5
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
5
        return arguments[0];
948
5
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
13
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
13
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
13
        return arguments[0];
948
13
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
21
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
21
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
21
        return arguments[0];
948
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE20get_return_type_implERKSt6vectorISt10shared_ptrIKNS_9IDataTypeEESaIS9_EE
Line
Count
Source
941
22
    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
942
22
        if (!is_string_type(arguments[0]->get_primitive_type())) {
943
0
            throw doris::Exception(ErrorCode::INVALID_ARGUMENT,
944
0
                                   "Illegal type {} of argument of function {}",
945
0
                                   arguments[0]->get_name(), get_name());
946
0
        }
947
22
        return arguments[0];
948
22
    }
949
    // The second parameter of "trim" is a constant.
950
426
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
80
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
47
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
47
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
30
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
24
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
24
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
Unexecuted instantiation: _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
5
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
61
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
51
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE38get_arguments_that_are_always_constantEv
Line
Count
Source
950
52
    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
951
952
239
    DataTypes get_variadic_argument_types_impl() const override {
953
239
        return impl::get_variadic_argument_types();
954
239
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
40
    DataTypes get_variadic_argument_types_impl() const override {
953
40
        return impl::get_variadic_argument_types();
954
40
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
24
    DataTypes get_variadic_argument_types_impl() const override {
953
24
        return impl::get_variadic_argument_types();
954
24
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
24
    DataTypes get_variadic_argument_types_impl() const override {
953
24
        return impl::get_variadic_argument_types();
954
24
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
13
    DataTypes get_variadic_argument_types_impl() const override {
953
13
        return impl::get_variadic_argument_types();
954
13
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
12
    DataTypes get_variadic_argument_types_impl() const override {
953
12
        return impl::get_variadic_argument_types();
954
12
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
12
    DataTypes get_variadic_argument_types_impl() const override {
953
12
        return impl::get_variadic_argument_types();
954
12
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
8
    DataTypes get_variadic_argument_types_impl() const override {
953
8
        return impl::get_variadic_argument_types();
954
8
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
13
    DataTypes get_variadic_argument_types_impl() const override {
953
13
        return impl::get_variadic_argument_types();
954
13
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
13
    DataTypes get_variadic_argument_types_impl() const override {
953
13
        return impl::get_variadic_argument_types();
954
13
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
21
    DataTypes get_variadic_argument_types_impl() const override {
953
21
        return impl::get_variadic_argument_types();
954
21
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
29
    DataTypes get_variadic_argument_types_impl() const override {
953
29
        return impl::get_variadic_argument_types();
954
29
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE32get_variadic_argument_types_implEv
Line
Count
Source
952
30
    DataTypes get_variadic_argument_types_impl() const override {
953
30
        return impl::get_variadic_argument_types();
954
30
    }
955
956
    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
957
287
                        uint32_t result, size_t input_rows_count) const override {
958
287
        return impl::execute(context, block, arguments, result, input_rows_count);
959
287
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
44
                        uint32_t result, size_t input_rows_count) const override {
958
44
        return impl::execute(context, block, arguments, result, input_rows_count);
959
44
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
27
                        uint32_t result, size_t input_rows_count) const override {
958
27
        return impl::execute(context, block, arguments, result, input_rows_count);
959
27
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
27
                        uint32_t result, size_t input_rows_count) const override {
958
27
        return impl::execute(context, block, arguments, result, input_rows_count);
959
27
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_8NameTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
19
                        uint32_t result, size_t input_rows_count) const override {
958
19
        return impl::execute(context, block, arguments, result, input_rows_count);
959
19
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_9NameLTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
16
                        uint32_t result, size_t input_rows_count) const override {
958
16
        return impl::execute(context, block, arguments, result, input_rows_count);
959
16
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_9NameRTrimEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
16
                        uint32_t result, size_t input_rows_count) const override {
958
16
        return impl::execute(context, block, arguments, result, input_rows_count);
959
16
    }
Unexecuted instantiation: _ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
5
                        uint32_t result, size_t input_rows_count) const override {
958
5
        return impl::execute(context, block, arguments, result, input_rows_count);
959
5
    }
_ZNK5doris12FunctionTrimINS_9Trim1ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
5
                        uint32_t result, size_t input_rows_count) const override {
958
5
        return impl::execute(context, block, arguments, result, input_rows_count);
959
5
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb1ENS_10NameTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
45
                        uint32_t result, size_t input_rows_count) const override {
958
45
        return impl::execute(context, block, arguments, result, input_rows_count);
959
45
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb1ELb0ENS_11NameLTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
41
                        uint32_t result, size_t input_rows_count) const override {
958
41
        return impl::execute(context, block, arguments, result, input_rows_count);
959
41
    }
_ZNK5doris12FunctionTrimINS_9Trim2ImplILb0ELb1ENS_11NameRTrimInEEEE12execute_implEPNS_15FunctionContextERNS_5BlockERKSt6vectorIjSaIjEEjm
Line
Count
Source
957
42
                        uint32_t result, size_t input_rows_count) const override {
958
42
        return impl::execute(context, block, arguments, result, input_rows_count);
959
42
    }
960
};
961
962
struct UnHexImplEmpty {
963
    static constexpr auto name = "unhex";
964
};
965
966
struct UnHexImplNull {
967
    static constexpr auto name = "unhex_null";
968
};
969
970
template <typename Name>
971
struct UnHexImpl {
972
    static constexpr auto name = Name::name;
973
    using ReturnType = DataTypeString;
974
    using ColumnType = ColumnString;
975
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
976
977
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
978
155
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
979
155
        auto rows_count = offsets.size();
980
155
        dst_offsets.resize(rows_count);
981
982
155
        int64_t total_size = 0;
983
358
        for (size_t i = 0; i < rows_count; i++) {
984
203
            size_t len = offsets[i] - offsets[i - 1];
985
203
            total_size += len / 2;
986
203
        }
987
155
        ColumnString::check_chars_length(total_size, rows_count);
988
155
        dst_data.resize(total_size);
989
155
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
990
155
        size_t offset = 0;
991
992
358
        for (int i = 0; i < rows_count; ++i) {
993
203
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
994
203
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
995
996
203
            if (UNLIKELY(srclen == 0)) {
997
14
                dst_offsets[i] = cast_set<uint32_t>(offset);
998
14
                continue;
999
14
            }
1000
1001
189
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1002
1003
189
            offset += outlen;
1004
189
            dst_offsets[i] = cast_set<uint32_t>(offset);
1005
189
        }
1006
155
        dst_data.pop_back(total_size - offset);
1007
155
        return Status::OK();
1008
155
    }
1009
1010
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1011
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1012
33
                         ColumnUInt8::Container* null_map_data) {
1013
33
        auto rows_count = offsets.size();
1014
33
        dst_offsets.resize(rows_count);
1015
1016
33
        int64_t total_size = 0;
1017
84
        for (size_t i = 0; i < rows_count; i++) {
1018
51
            size_t len = offsets[i] - offsets[i - 1];
1019
51
            total_size += len / 2;
1020
51
        }
1021
33
        ColumnString::check_chars_length(total_size, rows_count);
1022
33
        dst_data.resize(total_size);
1023
33
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1024
33
        size_t offset = 0;
1025
1026
84
        for (int i = 0; i < rows_count; ++i) {
1027
51
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1028
51
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1029
1030
51
            if (UNLIKELY(srclen == 0)) {
1031
7
                (*null_map_data)[i] = 1;
1032
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1033
7
                continue;
1034
7
            }
1035
1036
44
            int outlen = string_hex::hex_decode(source, srclen, dst_data_ptr + offset);
1037
1038
44
            if (outlen == 0) {
1039
13
                (*null_map_data)[i] = 1;
1040
13
                dst_offsets[i] = cast_set<uint32_t>(offset);
1041
13
                continue;
1042
13
            }
1043
1044
31
            offset += outlen;
1045
31
            dst_offsets[i] = cast_set<uint32_t>(offset);
1046
31
        }
1047
33
        dst_data.pop_back(total_size - offset);
1048
33
        return Status::OK();
1049
33
    }
1050
};
1051
1052
struct NameStringSpace {
1053
    static constexpr auto name = "space";
1054
};
1055
1056
struct StringSpace {
1057
    using ReturnType = DataTypeString;
1058
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_INT;
1059
    using Type = Int32;
1060
    using ReturnColumnType = ColumnString;
1061
1062
    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
1063
2
                         ColumnString::Offsets& res_offsets) {
1064
2
        res_offsets.resize(data.size());
1065
2
        size_t input_size = res_offsets.size();
1066
2
        int64_t total_size = 0;
1067
18
        for (size_t i = 0; i < input_size; ++i) {
1068
16
            if (data[i] > 0) {
1069
10
                total_size += data[i];
1070
10
            }
1071
16
        }
1072
2
        ColumnString::check_chars_length(total_size, input_size);
1073
2
        res_data.reserve(total_size);
1074
1075
18
        for (size_t i = 0; i < input_size; ++i) {
1076
16
            if (data[i] > 0) [[likely]] {
1077
10
                res_data.resize_fill(res_data.size() + data[i], ' ');
1078
10
                cast_set(res_offsets[i], res_data.size());
1079
10
            } else {
1080
6
                StringOP::push_empty_string(i, res_data, res_offsets);
1081
6
            }
1082
16
        }
1083
2
        return Status::OK();
1084
2
    }
1085
};
1086
1087
struct ToBase64Impl {
1088
    static constexpr auto name = "to_base64";
1089
    using ReturnType = DataTypeString;
1090
    using ColumnType = ColumnString;
1091
    static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_STRING;
1092
1093
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1094
116
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
1095
116
        auto rows_count = offsets.size();
1096
116
        dst_offsets.resize(rows_count);
1097
1098
116
        size_t total_size = 0;
1099
268
        for (size_t i = 0; i < rows_count; i++) {
1100
152
            size_t len = offsets[i] - offsets[i - 1];
1101
152
            total_size += 4 * ((len + 2) / 3);
1102
152
        }
1103
116
        ColumnString::check_chars_length(total_size, rows_count);
1104
116
        dst_data.resize(total_size);
1105
116
        auto* dst_data_ptr = dst_data.data();
1106
116
        size_t offset = 0;
1107
1108
268
        for (int i = 0; i < rows_count; ++i) {
1109
152
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1110
152
            size_t srclen = offsets[i] - offsets[i - 1];
1111
1112
152
            if (UNLIKELY(srclen == 0)) {
1113
7
                dst_offsets[i] = cast_set<uint32_t>(offset);
1114
7
                continue;
1115
7
            }
1116
1117
145
            auto outlen = doris::base64_encode((const unsigned char*)source, srclen,
1118
145
                                               (unsigned char*)(dst_data_ptr + offset));
1119
1120
145
            offset += outlen;
1121
145
            dst_offsets[i] = cast_set<uint32_t>(offset);
1122
145
        }
1123
116
        dst_data.pop_back(total_size - offset);
1124
116
        return Status::OK();
1125
116
    }
1126
};
1127
1128
struct FromBase64Impl {
1129
    static constexpr auto name = "from_base64";
1130
    using ReturnType = DataTypeString;
1131
    using ColumnType = ColumnString;
1132
1133
    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
1134
                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
1135
118
                         NullMap& null_map) {
1136
118
        auto rows_count = offsets.size();
1137
118
        dst_offsets.resize(rows_count);
1138
1139
118
        size_t total_size = 0;
1140
289
        for (size_t i = 0; i < rows_count; i++) {
1141
171
            auto len = offsets[i] - offsets[i - 1];
1142
171
            total_size += len / 4 * 3;
1143
171
        }
1144
118
        ColumnString::check_chars_length(total_size, rows_count);
1145
118
        dst_data.resize(total_size);
1146
118
        char* dst_data_ptr = reinterpret_cast<char*>(dst_data.data());
1147
118
        size_t offset = 0;
1148
1149
289
        for (int i = 0; i < rows_count; ++i) {
1150
171
            if (UNLIKELY(null_map[i])) {
1151
0
                null_map[i] = 1;
1152
0
                dst_offsets[i] = cast_set<uint32_t>(offset);
1153
0
                continue;
1154
0
            }
1155
1156
171
            const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
1157
171
            ColumnString::Offset srclen = offsets[i] - offsets[i - 1];
1158
1159
171
            if (UNLIKELY(srclen == 0)) {
1160
6
                dst_offsets[i] = cast_set<uint32_t>(offset);
1161
6
                continue;
1162
6
            }
1163
1164
165
            auto outlen = base64_decode(source, srclen, dst_data_ptr + offset);
1165
1166
165
            if (outlen < 0) {
1167
60
                null_map[i] = 1;
1168
60
                dst_offsets[i] = cast_set<uint32_t>(offset);
1169
105
            } else {
1170
105
                offset += outlen;
1171
105
                dst_offsets[i] = cast_set<uint32_t>(offset);
1172
105
            }
1173
165
        }
1174
118
        dst_data.pop_back(total_size - offset);
1175
118
        return Status::OK();
1176
118
    }
1177
};
1178
1179
struct StringAppendTrailingCharIfAbsent {
1180
    static constexpr auto name = "append_trailing_char_if_absent";
1181
    using Chars = ColumnString::Chars;
1182
    using Offsets = ColumnString::Offsets;
1183
    using ReturnType = DataTypeString;
1184
    using ColumnType = ColumnString;
1185
1186
24
    static bool str_end_with(const StringRef& str, const StringRef& end) {
1187
24
        if (str.size < end.size) {
1188
8
            return false;
1189
8
        }
1190
        // The end_with method of StringRef needs to ensure that the size of end is less than or equal to the size of str.
1191
16
        return str.end_with(end);
1192
24
    }
1193
1194
    static void vector_vector(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1195
                              const Chars& rdata, const Offsets& roffsets, Chars& res_data,
1196
25
                              Offsets& res_offsets, NullMap& null_map_data) {
1197
25
        DCHECK_EQ(loffsets.size(), roffsets.size());
1198
25
        size_t input_rows_count = loffsets.size();
1199
25
        res_offsets.resize(input_rows_count);
1200
25
        fmt::memory_buffer buffer;
1201
1202
96
        for (size_t i = 0; i < input_rows_count; ++i) {
1203
71
            buffer.clear();
1204
1205
71
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1206
71
                                       loffsets[i] - loffsets[i - 1]);
1207
71
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1208
71
                                       roffsets[i] - roffsets[i - 1]);
1209
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1210
71
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1211
71
                    rstr.begin(), rstr.end(), 2);
1212
1213
71
            if (char_len != 1) {
1214
59
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1215
59
                continue;
1216
59
            }
1217
12
            if (str_end_with(lstr, rstr)) {
1218
4
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1219
4
                continue;
1220
4
            }
1221
1222
8
            buffer.append(lstr.begin(), lstr.end());
1223
8
            buffer.append(rstr.begin(), rstr.end());
1224
8
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1225
8
                                        res_offsets);
1226
8
        }
1227
25
    }
1228
    static void vector_scalar(FunctionContext* context, const Chars& ldata, const Offsets& loffsets,
1229
                              const StringRef& rstr, Chars& res_data, Offsets& res_offsets,
1230
8
                              NullMap& null_map_data) {
1231
8
        size_t input_rows_count = loffsets.size();
1232
8
        res_offsets.resize(input_rows_count);
1233
8
        fmt::memory_buffer buffer;
1234
        // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1235
8
        auto [byte_len, char_len] =
1236
8
                simd::VStringFunctions::iterate_utf8_with_limit_length(rstr.begin(), rstr.end(), 2);
1237
8
        if (char_len != 1) {
1238
4
            for (size_t i = 0; i < input_rows_count; ++i) {
1239
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1240
2
            }
1241
2
            return;
1242
2
        }
1243
1244
12
        for (size_t i = 0; i < input_rows_count; ++i) {
1245
6
            buffer.clear();
1246
6
            StringRef lstr = StringRef(reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]),
1247
6
                                       loffsets[i] - loffsets[i - 1]);
1248
1249
6
            if (str_end_with(lstr, rstr)) {
1250
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1251
2
                continue;
1252
2
            }
1253
1254
4
            buffer.append(lstr.begin(), lstr.end());
1255
4
            buffer.append(rstr.begin(), rstr.end());
1256
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1257
4
                                        res_offsets);
1258
4
        }
1259
6
    }
1260
    static void scalar_vector(FunctionContext* context, const StringRef& lstr, const Chars& rdata,
1261
                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
1262
8
                              NullMap& null_map_data) {
1263
8
        size_t input_rows_count = roffsets.size();
1264
8
        res_offsets.resize(input_rows_count);
1265
8
        fmt::memory_buffer buffer;
1266
1267
16
        for (size_t i = 0; i < input_rows_count; ++i) {
1268
8
            buffer.clear();
1269
1270
8
            StringRef rstr = StringRef(reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]),
1271
8
                                       roffsets[i] - roffsets[i - 1]);
1272
            // The iterate_utf8_with_limit_length function iterates over a maximum of two UTF-8 characters.
1273
8
            auto [byte_len, char_len] = simd::VStringFunctions::iterate_utf8_with_limit_length(
1274
8
                    rstr.begin(), rstr.end(), 2);
1275
1276
8
            if (char_len != 1) {
1277
2
                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
1278
2
                continue;
1279
2
            }
1280
6
            if (str_end_with(lstr, rstr)) {
1281
2
                StringOP::push_value_string(lstr, i, res_data, res_offsets);
1282
2
                continue;
1283
2
            }
1284
1285
4
            buffer.append(lstr.begin(), lstr.end());
1286
4
            buffer.append(rstr.begin(), rstr.end());
1287
4
            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
1288
4
                                        res_offsets);
1289
4
        }
1290
8
    }
1291
};
1292
1293
struct StringLPad {
1294
    static constexpr auto name = "lpad";
1295
    static constexpr auto is_lpad = true;
1296
};
1297
1298
struct StringRPad {
1299
    static constexpr auto name = "rpad";
1300
    static constexpr auto is_lpad = false;
1301
};
1302
1303
template <typename LeftDataType, typename RightDataType>
1304
using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
1305
1306
template <typename LeftDataType, typename RightDataType>
1307
using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
1308
1309
template <typename LeftDataType, typename RightDataType>
1310
using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
1311
1312
// ready for regist function
1313
using FunctionStringParseDataSize = FunctionUnaryToType<ParseDataSize, NameParseDataSize>;
1314
using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
1315
using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLength>;
1316
using FunctionCrc32 = FunctionUnaryToType<Crc32Impl, NameCrc32>;
1317
using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
1318
using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
1319
using FunctionStringStartsWith =
1320
        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
1321
using FunctionStringEndsWith =
1322
        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
1323
using FunctionStringInstr =
1324
        FunctionBinaryToType<DataTypeString, DataTypeString, StringInStrImpl, NameInstr>;
1325
using FunctionStringLocate =
1326
        FunctionBinaryToType<DataTypeString, DataTypeString, StringLocateImpl, NameLocate>;
1327
using FunctionStringFindInSet =
1328
        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
1329
1330
using FunctionQuote = FunctionStringToString<NameQuoteImpl, NameQuote>;
1331
1332
using FunctionToLower = FunctionStringToString<TransferImpl<NameToLower>, NameToLower>;
1333
1334
using FunctionToUpper = FunctionStringToString<TransferImpl<NameToUpper>, NameToUpper>;
1335
1336
using FunctionToInitcap = FunctionStringToString<InitcapImpl, NameToInitcap>;
1337
1338
using FunctionUnHex = FunctionStringEncode<UnHexImpl<UnHexImplEmpty>, false>;
1339
using FunctionUnHexNullable = FunctionStringEncode<UnHexImpl<UnHexImplNull>, true>;
1340
using FunctionToBase64 = FunctionStringEncode<ToBase64Impl, false>;
1341
using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
1342
1343
using FunctionStringAppendTrailingCharIfAbsent =
1344
        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
1345
1346
using FunctionStringLPad = FunctionStringPad<StringLPad>;
1347
using FunctionStringRPad = FunctionStringPad<StringRPad>;
1348
1349
using FunctionMakeSet = FunctionNeedsToHandleNull<MakeSetImpl, PrimitiveType::TYPE_STRING>;
1350
1351
8
void register_function_string(SimpleFunctionFactory& factory) {
1352
8
    factory.register_function<FunctionStringParseDataSize>();
1353
8
    factory.register_function<FunctionStringASCII>();
1354
8
    factory.register_function<FunctionStringLength>();
1355
8
    factory.register_function<FunctionCrc32>();
1356
8
    factory.register_function<FunctionStringUTF8Length>();
1357
8
    factory.register_function<FunctionStringSpace>();
1358
8
    factory.register_function<FunctionStringStartsWith>();
1359
8
    factory.register_function<FunctionStringEndsWith>();
1360
8
    factory.register_function<FunctionStringInstr>();
1361
8
    factory.register_function<FunctionStringFindInSet>();
1362
8
    factory.register_function<FunctionStringLocate>();
1363
8
    factory.register_function<FunctionStringLocatePos>();
1364
8
    factory.register_function<FunctionQuote>();
1365
8
    factory.register_function<FunctionAutoPartitionName>();
1366
8
    factory.register_function<FunctionReverseCommon>();
1367
8
    factory.register_function<FunctionUnHex>();
1368
8
    factory.register_function<FunctionUnHexNullable>();
1369
8
    factory.register_function<FunctionToLower>();
1370
8
    factory.register_function<FunctionToUpper>();
1371
8
    factory.register_function<FunctionToInitcap>();
1372
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrim>>>();
1373
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrim>>>();
1374
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrim>>>();
1375
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrim>>>();
1376
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrim>>>();
1377
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrim>>>();
1378
8
    factory.register_function<FunctionTrim<Trim1Impl<true, true, NameTrimIn>>>();
1379
8
    factory.register_function<FunctionTrim<Trim1Impl<true, false, NameLTrimIn>>>();
1380
8
    factory.register_function<FunctionTrim<Trim1Impl<false, true, NameRTrimIn>>>();
1381
8
    factory.register_function<FunctionTrim<Trim2Impl<true, true, NameTrimIn>>>();
1382
8
    factory.register_function<FunctionTrim<Trim2Impl<true, false, NameLTrimIn>>>();
1383
8
    factory.register_function<FunctionTrim<Trim2Impl<false, true, NameRTrimIn>>>();
1384
8
    factory.register_function<FunctionConvertTo>();
1385
8
    factory.register_function<FunctionSubstring<Substr3Impl>>();
1386
8
    factory.register_function<FunctionSubstring<Substr2Impl>>();
1387
8
    factory.register_function<FunctionLeft>();
1388
8
    factory.register_function<FunctionRight>();
1389
8
    factory.register_function<FunctionNullOrEmpty>();
1390
8
    factory.register_function<FunctionNotNullOrEmpty>();
1391
8
    factory.register_function<FunctionStringConcat>();
1392
8
    factory.register_function<FunctionIntToChar>();
1393
8
    factory.register_function<FunctionStringElt>();
1394
8
    factory.register_function<FunctionStringConcatWs>();
1395
8
    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
1396
8
    factory.register_function<FunctionStringRepeat>();
1397
8
    factory.register_function<FunctionStringLPad>();
1398
8
    factory.register_function<FunctionStringRPad>();
1399
8
    factory.register_function<FunctionToBase64>();
1400
8
    factory.register_function<FunctionFromBase64>();
1401
8
    factory.register_function<FunctionSplitPart>();
1402
8
    factory.register_function<FunctionSplitByString>();
1403
8
    factory.register_function<FunctionCountSubString<FunctionCountSubStringType::TWO_ARGUMENTS>>();
1404
8
    factory.register_function<
1405
8
            FunctionCountSubString<FunctionCountSubStringType::THREE_ARGUMENTS>>();
1406
8
    factory.register_function<FunctionSubstringIndex>();
1407
8
    factory.register_function<FunctionExtractURLParameter>();
1408
8
    factory.register_function<FunctionStringParseUrl>();
1409
8
    factory.register_function<FunctionUrlDecode>();
1410
8
    factory.register_function<FunctionUrlEncode>();
1411
8
    factory.register_function<FunctionRandomBytes>();
1412
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
1413
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
1414
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
1415
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMALV2>>>();
1416
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL32>>>();
1417
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL64>>>();
1418
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL128I>>>();
1419
8
    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl<TYPE_DECIMAL256>>>();
1420
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDoubleImpl>>();
1421
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt64Impl>>();
1422
8
    factory.register_function<FunctionStringFormatRound<FormatRoundInt128Impl>>();
1423
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMALV2>>>();
1424
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL32>>>();
1425
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL64>>>();
1426
8
    factory.register_function<
1427
8
            FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL128I>>>();
1428
8
    factory.register_function<FunctionStringFormatRound<FormatRoundDecimalImpl<TYPE_DECIMAL256>>>();
1429
8
    factory.register_function<FunctionStringDigestMulti<SM3Sum>>();
1430
8
    factory.register_function<FunctionStringDigestMulti<MD5Sum>>();
1431
8
    factory.register_function<FunctionStringDigestSHA1>();
1432
8
    factory.register_function<FunctionStringDigestSHA2>();
1433
8
    factory.register_function<FunctionReplace<ReplaceImpl, true>>();
1434
8
    factory.register_function<FunctionReplace<ReplaceEmptyImpl, false>>();
1435
8
    factory.register_function<FunctionTranslate>();
1436
8
    factory.register_function<FunctionMask>();
1437
8
    factory.register_function<FunctionMaskPartial<true>>();
1438
8
    factory.register_function<FunctionMaskPartial<false>>();
1439
8
    factory.register_function<FunctionSubReplace<SubReplaceThreeImpl>>();
1440
8
    factory.register_function<FunctionSubReplace<SubReplaceFourImpl>>();
1441
8
    factory.register_function<FunctionOverlay>();
1442
8
    factory.register_function<FunctionStrcmp>();
1443
8
    factory.register_function<FunctionNgramSearch>();
1444
8
    factory.register_function<FunctionXPathString>();
1445
8
    factory.register_function<FunctionCrc32Internal>();
1446
8
    factory.register_function<FunctionMakeSet>();
1447
8
    factory.register_function<FunctionExportSet>();
1448
8
    factory.register_function<FunctionUnicodeNormalize>();
1449
1450
8
    factory.register_alias(FunctionLeft::name, "strleft");
1451
8
    factory.register_alias(FunctionRight::name, "strright");
1452
8
    factory.register_alias(SubstringUtil::name, "substr");
1453
8
    factory.register_alias(SubstringUtil::name, "mid");
1454
8
    factory.register_alias(FunctionToLower::name, "lcase");
1455
8
    factory.register_alias(FunctionToUpper::name, "ucase");
1456
8
    factory.register_alias(FunctionStringDigestMulti<MD5Sum>::name, "md5");
1457
8
    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
1458
8
    factory.register_alias(FunctionStringDigestMulti<SM3Sum>::name, "sm3");
1459
8
    factory.register_alias(FunctionStringDigestSHA1::name, "sha");
1460
8
    factory.register_alias(FunctionStringLocatePos::name, "position");
1461
8
    factory.register_alias(FunctionStringLength::name, "octet_length");
1462
8
    factory.register_alias(FunctionOverlay::name, "insert");
1463
8
}
1464
1465
} // namespace doris